From 9ad63986c606c60e2e916b1b96f22991f966d9cc Mon Sep 17 00:00:00 2001
From: Dima Zavin <dima@android.com>
Date: Sun, 10 Jul 2011 16:01:15 -0700
Subject: pda_power: Add support for using otg transceiver events

If the platform data sets the use_otg_notifier flag,
the driver will now register an otg notifier callback and listen
to transceiver events for AC/USB plug-in events instead. This would
normally be used by not specifying is_xx_online callbacks and
not specifying any irqs so the state machine is completely driven
from OTG xceiver events.

Signed-off-by: Dima Zavin <dima@android.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/pda_power.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pda_power.h b/include/linux/pda_power.h
index c9e4d814ff77..2bb62bf296ac 100644
--- a/include/linux/pda_power.h
+++ b/include/linux/pda_power.h
@@ -35,6 +35,8 @@ struct pda_power_pdata {
 	unsigned int polling_interval; /* msecs, default is 2000 */
 
 	unsigned long ac_max_uA; /* current to draw when on AC */
+
+	bool use_otg_notifier;
 };
 
 #endif /* __PDA_POWER_H__ */
-- 
cgit v1.2.3


From 854a68521badc48460c9cbcdf37b220865836ac3 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Tue, 27 Sep 2011 12:35:21 -0400
Subject: add ELF machine define for TI C6X DSPs

Signed-off-by: Mark Salter <msalter@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/elf-em.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 18bea78fe47b..8e2b7bac4378 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -33,6 +33,7 @@
 #define EM_H8_300	46	/* Renesas H8/300,300H,H8S */
 #define EM_MN10300	89	/* Panasonic/MEI MN10300, AM33 */
 #define EM_BLACKFIN     106     /* ADI Blackfin Processor */
+#define EM_TI_C6000	140	/* TI C6X DSPs */
 #define EM_FRV		0x5441	/* Fujitsu FR-V */
 #define EM_AVR32	0x18ad	/* Atmel AVR32 */
 
-- 
cgit v1.2.3


From 49920bc66984a512f4bcc7735a61642cd0e4d6f2 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Thu, 13 Oct 2011 15:15:27 +0530
Subject: dmaengine: add new enum dma_transfer_direction

This new enum removes usage of dma_data_direction for dma direction. The new
enum cleans tells the DMA direction and mode
This further paves way for merging the dmaengine _prep operations and also for
interleaved dma

Suggested-by: Jassi Brar <jaswinder.singh@linaro.org>
Reviewed-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/dmaengine.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ace51af4369f..d946ef7f5e67 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -23,7 +23,6 @@
 
 #include <linux/device.h>
 #include <linux/uio.h>
-#include <linux/dma-direction.h>
 #include <linux/scatterlist.h>
 
 /**
@@ -75,6 +74,19 @@ enum dma_transaction_type {
 /* last transaction type for creation of the capabilities mask */
 #define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
 
+/**
+ * enum dma_transfer_direction - dma transfer mode and direction indicator
+ * @DMA_MEM_TO_MEM: Async/Memcpy mode
+ * @DMA_MEM_TO_DEV: Slave mode & From Memory to Device
+ * @DMA_DEV_TO_MEM: Slave mode & From Device to Memory
+ * @DMA_DEV_TO_DEV: Slave mode & From Device to Device
+ */
+enum dma_transfer_direction {
+	DMA_MEM_TO_MEM,
+	DMA_MEM_TO_DEV,
+	DMA_DEV_TO_MEM,
+	DMA_DEV_TO_DEV,
+};
 
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
@@ -267,7 +279,7 @@ enum dma_slave_buswidth {
  * struct, if applicable.
  */
 struct dma_slave_config {
-	enum dma_data_direction direction;
+	enum dma_transfer_direction direction;
 	dma_addr_t src_addr;
 	dma_addr_t dst_addr;
 	enum dma_slave_buswidth src_addr_width;
@@ -490,11 +502,11 @@ struct dma_device {
 
 	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction);
+		size_t period_len, enum dma_transfer_direction direction);
 	int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		unsigned long arg);
 
@@ -520,7 +532,7 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
 
 static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
 	struct dma_chan *chan, void *buf, size_t len,
-	enum dma_data_direction dir, unsigned long flags)
+	enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct scatterlist sg;
 	sg_init_one(&sg, buf, len);
-- 
cgit v1.2.3


From db8196df4bb6f117caa163aa73b0f16fd62290bd Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Thu, 13 Oct 2011 22:34:23 +0530
Subject: dmaengine: move drivers to dma_transfer_direction

fixup usage of dma direction by introducing dma_transfer_direction,
this patch moves dma/drivers/* to use new enum

Cc: Jassi Brar <jaswinder.singh@linaro.org>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Viresh Kumar <viresh.kumar@st.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Mika Westerberg <mika.westerberg@iki.fi>
Cc: H Hartley Sweeten <hartleys@visionengravers.com>
Cc: Li Yang <leoli@freescale.com>
Cc: Zhang Wei <zw@zh-kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Cc: Shawn Guo <shawn.guo@freescale.com>
Cc: Yong Wang <yong.y.wang@intel.com>
Cc: Tomoya MORINAGA <tomoya-linux@dsn.lapis-semi.com>
Cc: Boojin Kim <boojin.kim@samsung.com>
Cc: Barry Song <Baohua.Song@csr.com>
Acked-by: Mika Westerberg <mika.westerberg@iki.fi>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 arch/arm/mach-ep93xx/include/mach/dma.h        |  6 +++---
 arch/arm/plat-nomadik/include/plat/ste_dma40.h |  4 ++--
 drivers/dma/amba-pl08x.c                       | 24 ++++++++++++------------
 drivers/dma/at_hdmac.c                         | 22 +++++++++++-----------
 drivers/dma/coh901318.c                        | 12 ++++++------
 drivers/dma/coh901318_lli.c                    | 23 +++++++++++------------
 drivers/dma/coh901318_lli.h                    |  4 ++--
 drivers/dma/dw_dmac.c                          | 14 +++++++-------
 drivers/dma/ep93xx_dma.c                       | 22 +++++++++++-----------
 drivers/dma/fsldma.c                           |  4 ++--
 drivers/dma/imx-dma.c                          | 10 +++++-----
 drivers/dma/imx-sdma.c                         | 10 +++++-----
 drivers/dma/intel_mid_dma.c                    | 14 +++++++-------
 drivers/dma/intel_mid_dma_regs.h               |  2 +-
 drivers/dma/ipu/ipu_idmac.c                    |  4 ++--
 drivers/dma/mxs-dma.c                          |  8 ++++----
 drivers/dma/pch_dma.c                          | 12 ++++++------
 drivers/dma/pl330.c                            | 18 +++++++++---------
 drivers/dma/shdma.c                            | 25 ++++++++++++-------------
 drivers/dma/ste_dma40.c                        | 26 +++++++++++++-------------
 drivers/dma/timb_dma.c                         | 18 +++++++++---------
 drivers/dma/txx9dmac.c                         | 12 ++++++------
 include/linux/amba/pl08x.h                     |  4 ++--
 include/linux/dw_dmac.h                        |  2 +-
 include/linux/sh_dma.h                         |  2 +-
 25 files changed, 150 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ep93xx/include/mach/dma.h b/arch/arm/mach-ep93xx/include/mach/dma.h
index 46d4d876e6fb..e82c642fa53c 100644
--- a/arch/arm/mach-ep93xx/include/mach/dma.h
+++ b/arch/arm/mach-ep93xx/include/mach/dma.h
@@ -37,7 +37,7 @@
  */
 struct ep93xx_dma_data {
 	int				port;
-	enum dma_data_direction		direction;
+	enum dma_transfer_direction	direction;
 	const char			*name;
 };
 
@@ -80,14 +80,14 @@ static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan)
  * channel supports given DMA direction. Only M2P channels have such
  * limitation, for M2M channels the direction is configurable.
  */
-static inline enum dma_data_direction
+static inline enum dma_transfer_direction
 ep93xx_dma_chan_direction(struct dma_chan *chan)
 {
 	if (!ep93xx_dma_chan_is_m2p(chan))
 		return DMA_NONE;
 
 	/* even channels are for TX, odd for RX */
-	return (chan->chan_id % 2 == 0) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
 }
 
 #endif /* __ASM_ARCH_DMA_H */
diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 685c78716d95..38b041a40db4 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -187,7 +187,7 @@ static inline struct
 dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
 					    dma_addr_t addr,
 					    unsigned int size,
-					    enum dma_data_direction direction,
+					    enum dma_transfer_direction direction,
 					    unsigned long flags)
 {
 	struct scatterlist sg;
@@ -209,7 +209,7 @@ static inline struct
 dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
 					    dma_addr_t addr,
 					    unsigned int size,
-					    enum dma_data_direction direction,
+					    enum dma_transfer_direction direction,
 					    unsigned long flags)
 {
 	return NULL;
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index b7cbd1ab1db1..41c62fd0680d 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -882,9 +882,9 @@ static int prep_phy_channel(struct pl08x_dma_chan *plchan,
 		ch->signal = ret;
 
 		/* Assign the flow control signal to this channel */
-		if (txd->direction == DMA_TO_DEVICE)
+		if (txd->direction == DMA_MEM_TO_DEV)
 			txd->ccfg |= ch->signal << PL080_CONFIG_DST_SEL_SHIFT;
-		else if (txd->direction == DMA_FROM_DEVICE)
+		else if (txd->direction == DMA_DEV_TO_MEM)
 			txd->ccfg |= ch->signal << PL080_CONFIG_SRC_SEL_SHIFT;
 	}
 
@@ -1102,10 +1102,10 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 
 	/* Transfer direction */
 	plchan->runtime_direction = config->direction;
-	if (config->direction == DMA_TO_DEVICE) {
+	if (config->direction == DMA_MEM_TO_DEV) {
 		addr_width = config->dst_addr_width;
 		maxburst = config->dst_maxburst;
-	} else if (config->direction == DMA_FROM_DEVICE) {
+	} else if (config->direction == DMA_DEV_TO_MEM) {
 		addr_width = config->src_addr_width;
 		maxburst = config->src_maxburst;
 	} else {
@@ -1136,7 +1136,7 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 	cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
 	cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
 
-	if (plchan->runtime_direction == DMA_FROM_DEVICE) {
+	if (plchan->runtime_direction == DMA_DEV_TO_MEM) {
 		plchan->src_addr = config->src_addr;
 		plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
 			pl08x_select_bus(plchan->cd->periph_buses,
@@ -1152,7 +1152,7 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 		"configured channel %s (%s) for %s, data width %d, "
 		"maxburst %d words, LE, CCTL=0x%08x\n",
 		dma_chan_name(chan), plchan->name,
-		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		(config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
 		addr_width,
 		maxburst,
 		cctl);
@@ -1322,7 +1322,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 
 static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
@@ -1354,10 +1354,10 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	 */
 	txd->direction = direction;
 
-	if (direction == DMA_TO_DEVICE) {
+	if (direction == DMA_MEM_TO_DEV) {
 		txd->cctl = plchan->dst_cctl;
 		slave_addr = plchan->dst_addr;
-	} else if (direction == DMA_FROM_DEVICE) {
+	} else if (direction == DMA_DEV_TO_MEM) {
 		txd->cctl = plchan->src_cctl;
 		slave_addr = plchan->src_addr;
 	} else {
@@ -1368,10 +1368,10 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	}
 
 	if (plchan->cd->device_fc)
-		tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER_PER :
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
 			PL080_FLOW_PER2MEM_PER;
 	else
-		tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER :
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
 			PL080_FLOW_PER2MEM;
 
 	txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
@@ -1387,7 +1387,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		list_add_tail(&dsg->node, &txd->dsg_list);
 
 		dsg->len = sg_dma_len(sg);
-		if (direction == DMA_TO_DEVICE) {
+		if (direction == DMA_MEM_TO_DEV) {
 			dsg->src_addr = sg_phys(sg);
 			dsg->dst_addr = slave_addr;
 		} else {
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index fcfa0a8b5c59..7e76574e83ec 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -660,7 +660,7 @@ err_desc_get:
  */
 static struct dma_async_tx_descriptor *
 atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
@@ -678,7 +678,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n",
 			sg_len,
-			direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
 			flags);
 
 	if (unlikely(!atslave || !sg_len)) {
@@ -692,7 +692,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	ctrlb = ATC_IEN;
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		ctrla |=  ATC_DST_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
 			| ATC_SRC_ADDR_MODE_INCR
@@ -725,7 +725,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			total_len += len;
 		}
 		break;
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		ctrla |=  ATC_SRC_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_INCR
 			| ATC_SRC_ADDR_MODE_FIXED
@@ -787,7 +787,7 @@ err_desc_get:
  */
 static int
 atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	if (period_len > (ATC_BTSIZE_MAX << reg_width))
 		goto err_out;
@@ -795,7 +795,7 @@ atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
 		goto err_out;
 	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
 		goto err_out;
-	if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+	if (unlikely(!(direction & (DMA_DEV_TO_MEM | DMA_MEM_TO_DEV))))
 		goto err_out;
 
 	return 0;
@@ -810,7 +810,7 @@ err_out:
 static int
 atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 		unsigned int period_index, dma_addr_t buf_addr,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	u32		ctrla;
 	unsigned int	reg_width = atslave->reg_width;
@@ -822,7 +822,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 		| period_len >> reg_width;
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		desc->lli.saddr = buf_addr + (period_len * period_index);
 		desc->lli.daddr = atslave->tx_reg;
 		desc->lli.ctrla = ctrla;
@@ -833,7 +833,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 				| ATC_DIF(AT_DMA_PER_IF);
 		break;
 
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		desc->lli.saddr = atslave->rx_reg;
 		desc->lli.daddr = buf_addr + (period_len * period_index);
 		desc->lli.ctrla = ctrla;
@@ -861,7 +861,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
  */
 static struct dma_async_tx_descriptor *
 atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
@@ -872,7 +872,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	unsigned int		i;
 
 	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n",
-			direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
 			buf_addr,
 			periods, buf_len, period_len);
 
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index 4234f416ef11..d65a718c0f9b 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -39,7 +39,7 @@ struct coh901318_desc {
 	struct scatterlist *sg;
 	unsigned int sg_len;
 	struct coh901318_lli *lli;
-	enum dma_data_direction dir;
+	enum dma_transfer_direction dir;
 	unsigned long flags;
 	u32 head_config;
 	u32 head_ctrl;
@@ -1034,7 +1034,7 @@ coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 static struct dma_async_tx_descriptor *
 coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_data_direction direction,
+			unsigned int sg_len, enum dma_transfer_direction direction,
 			unsigned long flags)
 {
 	struct coh901318_chan *cohc = to_coh901318_chan(chan);
@@ -1077,7 +1077,7 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	ctrl_last |= cohc->runtime_ctrl;
 	ctrl |= cohc->runtime_ctrl;
 
-	if (direction == DMA_TO_DEVICE) {
+	if (direction == DMA_MEM_TO_DEV) {
 		u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
 			COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE;
 
@@ -1085,7 +1085,7 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		ctrl_chained |= tx_flags;
 		ctrl_last |= tx_flags;
 		ctrl |= tx_flags;
-	} else if (direction == DMA_FROM_DEVICE) {
+	} else if (direction == DMA_DEV_TO_MEM) {
 		u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST |
 			COH901318_CX_CTRL_DST_ADDR_INC_ENABLE;
 
@@ -1274,11 +1274,11 @@ static void coh901318_dma_set_runtimeconfig(struct dma_chan *chan,
 	int i = 0;
 
 	/* We only support mem to per or per to mem transfers */
-	if (config->direction == DMA_FROM_DEVICE) {
+	if (config->direction == DMA_DEV_TO_MEM) {
 		addr = config->src_addr;
 		addr_width = config->src_addr_width;
 		maxburst = config->src_maxburst;
-	} else if (config->direction == DMA_TO_DEVICE) {
+	} else if (config->direction == DMA_MEM_TO_DEV) {
 		addr = config->dst_addr;
 		addr_width = config->dst_addr_width;
 		maxburst = config->dst_maxburst;
diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c
index 9f7e0e6a7eea..6c0e2d4c6682 100644
--- a/drivers/dma/coh901318_lli.c
+++ b/drivers/dma/coh901318_lli.c
@@ -7,11 +7,10 @@
  * Author: Per Friden <per.friden@stericsson.com>
  */
 
-#include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
-#include <linux/dmapool.h>
 #include <linux/memory.h>
 #include <linux/gfp.h>
+#include <linux/dmapool.h>
 #include <mach/coh901318.h>
 
 #include "coh901318_lli.h"
@@ -177,18 +176,18 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
 			  struct coh901318_lli *lli,
 			  dma_addr_t buf, unsigned int size,
 			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom,
-			  enum dma_data_direction dir)
+			  enum dma_transfer_direction dir)
 {
 	int s = size;
 	dma_addr_t src;
 	dma_addr_t dst;
 
 
-	if (dir == DMA_TO_DEVICE) {
+	if (dir == DMA_MEM_TO_DEV) {
 		src = buf;
 		dst = dev_addr;
 
-	} else if (dir == DMA_FROM_DEVICE) {
+	} else if (dir == DMA_DEV_TO_MEM) {
 
 		src = dev_addr;
 		dst = buf;
@@ -215,9 +214,9 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
 
 		lli = coh901318_lli_next(lli);
 
-		if (dir == DMA_TO_DEVICE)
+		if (dir == DMA_MEM_TO_DEV)
 			src += block_size;
-		else if (dir == DMA_FROM_DEVICE)
+		else if (dir == DMA_DEV_TO_MEM)
 			dst += block_size;
 	}
 
@@ -234,7 +233,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 		      struct scatterlist *sgl, unsigned int nents,
 		      dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl,
 		      u32 ctrl_last,
-		      enum dma_data_direction dir, u32 ctrl_irq_mask)
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask)
 {
 	int i;
 	struct scatterlist *sg;
@@ -249,9 +248,9 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 
 	spin_lock(&pool->lock);
 
-	if (dir == DMA_TO_DEVICE)
+	if (dir == DMA_MEM_TO_DEV)
 		dst = dev_addr;
-	else if (dir == DMA_FROM_DEVICE)
+	else if (dir == DMA_DEV_TO_MEM)
 		src = dev_addr;
 	else
 		goto err;
@@ -269,7 +268,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 			ctrl_sg = ctrl ? ctrl : ctrl_last;
 
 
-		if (dir == DMA_TO_DEVICE)
+		if (dir == DMA_MEM_TO_DEV)
 			/* increment source address */
 			src = sg_phys(sg);
 		else
@@ -293,7 +292,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 			lli->src_addr = src;
 			lli->dst_addr = dst;
 
-			if (dir == DMA_FROM_DEVICE)
+			if (dir == DMA_DEV_TO_MEM)
 				dst += elem_size;
 			else
 				src += elem_size;
diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h
index 7a5c80990e9e..abff3714fdda 100644
--- a/drivers/dma/coh901318_lli.h
+++ b/drivers/dma/coh901318_lli.h
@@ -97,7 +97,7 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
 			  struct coh901318_lli *lli,
 			  dma_addr_t buf, unsigned int size,
 			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last,
-			  enum dma_data_direction dir);
+			  enum dma_transfer_direction dir);
 
 /**
  * coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer
@@ -119,6 +119,6 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 		      struct scatterlist *sg, unsigned int nents,
 		      dma_addr_t dev_addr, u32 ctrl_chained,
 		      u32 ctrl, u32 ctrl_last,
-		      enum dma_data_direction dir, u32 ctrl_irq_mask);
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask);
 
 #endif /* COH901318_LLI_H */
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 9bfd6d360718..decca1c3c83d 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -696,7 +696,7 @@ err_desc_get:
 
 static struct dma_async_tx_descriptor *
 dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
@@ -720,7 +720,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	prev = first = NULL;
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_DST_WIDTH(reg_width)
 				| DWC_CTLL_DST_FIX
@@ -777,7 +777,7 @@ slave_sg_todev_fill_desc:
 				goto slave_sg_todev_fill_desc;
 		}
 		break;
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_SRC_WIDTH(reg_width)
 				| DWC_CTLL_DST_INC
@@ -1165,7 +1165,7 @@ EXPORT_SYMBOL(dw_dma_cyclic_stop);
  */
 struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_data_direction direction)
+		enum dma_transfer_direction direction)
 {
 	struct dw_dma_chan		*dwc = to_dw_dma_chan(chan);
 	struct dw_cyclic_desc		*cdesc;
@@ -1206,7 +1206,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		goto out_err;
 	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
 		goto out_err;
-	if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+	if (unlikely(!(direction & (DMA_MEM_TO_DEV | DMA_DEV_TO_MEM))))
 		goto out_err;
 
 	retval = ERR_PTR(-ENOMEM);
@@ -1228,7 +1228,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 			goto out_err_desc_get;
 
 		switch (direction) {
-		case DMA_TO_DEVICE:
+		case DMA_MEM_TO_DEV:
 			desc->lli.dar = dws->tx_reg;
 			desc->lli.sar = buf_addr + (period_len * i);
 			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
@@ -1239,7 +1239,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
 			break;
-		case DMA_FROM_DEVICE:
+		case DMA_DEV_TO_MEM:
 			desc->lli.dar = buf_addr + (period_len * i);
 			desc->lli.sar = dws->rx_reg;
 			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index b47e2b803faf..009851b2aeea 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -330,7 +330,7 @@ static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
 	struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
 	u32 bus_addr;
 
-	if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_TO_DEVICE)
+	if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_MEM_TO_DEV)
 		bus_addr = desc->src_addr;
 	else
 		bus_addr = desc->dst_addr;
@@ -443,7 +443,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
 		control = (5 << M2M_CONTROL_PWSC_SHIFT);
 		control |= M2M_CONTROL_NO_HDSK;
 
-		if (data->direction == DMA_TO_DEVICE) {
+		if (data->direction == DMA_MEM_TO_DEV) {
 			control |= M2M_CONTROL_DAH;
 			control |= M2M_CONTROL_TM_TX;
 			control |= M2M_CONTROL_RSS_SSPTX;
@@ -463,7 +463,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
 		control |= M2M_CONTROL_RSS_IDE;
 		control |= M2M_CONTROL_PW_16;
 
-		if (data->direction == DMA_TO_DEVICE) {
+		if (data->direction == DMA_MEM_TO_DEV) {
 			/* Worst case from the UG */
 			control = (3 << M2M_CONTROL_PWSC_SHIFT);
 			control |= M2M_CONTROL_DAH;
@@ -803,8 +803,8 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
 			switch (data->port) {
 			case EP93XX_DMA_SSP:
 			case EP93XX_DMA_IDE:
-				if (data->direction != DMA_TO_DEVICE &&
-				    data->direction != DMA_FROM_DEVICE)
+				if (data->direction != DMA_MEM_TO_DEV &&
+				    data->direction != DMA_DEV_TO_MEM)
 					return -EINVAL;
 				break;
 			default:
@@ -952,7 +952,7 @@ fail:
  */
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-			 unsigned int sg_len, enum dma_data_direction dir,
+			 unsigned int sg_len, enum dma_transfer_direction dir,
 			 unsigned long flags)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
@@ -988,7 +988,7 @@ ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			goto fail;
 		}
 
-		if (dir == DMA_TO_DEVICE) {
+		if (dir == DMA_MEM_TO_DEV) {
 			desc->src_addr = sg_dma_address(sg);
 			desc->dst_addr = edmac->runtime_addr;
 		} else {
@@ -1032,7 +1032,7 @@ fail:
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			   size_t buf_len, size_t period_len,
-			   enum dma_data_direction dir)
+			   enum dma_transfer_direction dir)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
@@ -1065,7 +1065,7 @@ ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			goto fail;
 		}
 
-		if (dir == DMA_TO_DEVICE) {
+		if (dir == DMA_MEM_TO_DEV) {
 			desc->src_addr = dma_addr + offset;
 			desc->dst_addr = edmac->runtime_addr;
 		} else {
@@ -1133,12 +1133,12 @@ static int ep93xx_dma_slave_config(struct ep93xx_dma_chan *edmac,
 		return -EINVAL;
 
 	switch (config->direction) {
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		width = config->src_addr_width;
 		addr = config->src_addr;
 		break;
 
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		width = config->dst_addr_width;
 		addr = config->dst_addr;
 		break;
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 8a781540590c..b98070c33ca9 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -772,7 +772,7 @@ fail:
  */
 static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
 	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	/*
 	 * This operation is not supported on the Freescale DMA controller
@@ -819,7 +819,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 			return -ENXIO;
 
 		/* we set the controller burst size depending on direction */
-		if (config->direction == DMA_TO_DEVICE)
+		if (config->direction == DMA_MEM_TO_DEV)
 			size = config->dst_addr_width * config->dst_maxburst;
 		else
 			size = config->src_addr_width * config->src_maxburst;
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index d746899f36e1..678cd01dc42c 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -106,7 +106,7 @@ static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		imx_dma_disable(imxdmac->imxdma_channel);
 		return 0;
 	case DMA_SLAVE_CONFIG:
-		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
 			imxdmac->per_address = dmaengine_cfg->src_addr;
 			imxdmac->watermark_level = dmaengine_cfg->src_maxburst;
 			imxdmac->word_size = dmaengine_cfg->src_addr_width;
@@ -223,7 +223,7 @@ static void imxdma_free_chan_resources(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
@@ -240,7 +240,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		dma_length += sg->length;
 	}
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		dmamode = DMA_MODE_READ;
 	else
 		dmamode = DMA_MODE_WRITE;
@@ -270,7 +270,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 
 static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
@@ -316,7 +316,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 	imxdmac->sg_list[periods].page_link =
 		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		dmamode = DMA_MODE_READ;
 	else
 		dmamode = DMA_MODE_WRITE;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index eab1fe71259e..065de5442c93 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -246,7 +246,7 @@ struct sdma_engine;
 struct sdma_channel {
 	struct sdma_engine		*sdma;
 	unsigned int			channel;
-	enum dma_data_direction		direction;
+	enum dma_transfer_direction		direction;
 	enum sdma_peripheral_type	peripheral_type;
 	unsigned int			event_id0;
 	unsigned int			event_id1;
@@ -649,7 +649,7 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
 	int ret;
 
-	if (sdmac->direction == DMA_FROM_DEVICE) {
+	if (sdmac->direction == DMA_DEV_TO_MEM) {
 		load_address = sdmac->pc_from_device;
 	} else {
 		load_address = sdmac->pc_to_device;
@@ -910,7 +910,7 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
@@ -1007,7 +1007,7 @@ err_out:
 
 static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1092,7 +1092,7 @@ static int sdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		sdma_disable_channel(sdmac);
 		return 0;
 	case DMA_SLAVE_CONFIG:
-		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
 			sdmac->per_address = dmaengine_cfg->src_addr;
 			sdmac->watermark_level = dmaengine_cfg->src_maxburst;
 			sdmac->word_size = dmaengine_cfg->src_addr_width;
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 9e96c43a846a..6deda25fd0a8 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -394,10 +394,10 @@ static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
 							midc->dma->block_size);
 		/*Populate SAR and DAR values*/
 		sg_phy_addr = sg_phys(sg);
-		if (desc->dirn ==  DMA_TO_DEVICE) {
+		if (desc->dirn ==  DMA_MEM_TO_DEV) {
 			lli_bloc_desc->sar  = sg_phy_addr;
 			lli_bloc_desc->dar  = mids->dma_slave.dst_addr;
-		} else if (desc->dirn ==  DMA_FROM_DEVICE) {
+		} else if (desc->dirn ==  DMA_DEV_TO_MEM) {
 			lli_bloc_desc->sar  = mids->dma_slave.src_addr;
 			lli_bloc_desc->dar  = sg_phy_addr;
 		}
@@ -631,13 +631,13 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 		if (midc->dma->pimr_mask) {
 			cfg_hi.cfgx.protctl = 0x0; /*default value*/
 			cfg_hi.cfgx.fifo_mode = 1;
-			if (mids->dma_slave.direction == DMA_TO_DEVICE) {
+			if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
 				cfg_hi.cfgx.src_per = 0;
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.dst_per = 3;
 				if (mids->device_instance == 1)
 					cfg_hi.cfgx.dst_per = 1;
-			} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
+			} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.src_per = 2;
 				if (mids->device_instance == 1)
@@ -681,11 +681,11 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 		ctl_lo.ctlx.sinc = 0;
 		ctl_lo.ctlx.dinc = 0;
 	} else {
-		if (mids->dma_slave.direction == DMA_TO_DEVICE) {
+		if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
 			ctl_lo.ctlx.sinc = 0;
 			ctl_lo.ctlx.dinc = 2;
 			ctl_lo.ctlx.tt_fc = 1;
-		} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
+		} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
 			ctl_lo.ctlx.sinc = 2;
 			ctl_lo.ctlx.dinc = 0;
 			ctl_lo.ctlx.tt_fc = 2;
@@ -731,7 +731,7 @@ err_desc_get:
  */
 static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
 			struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_data_direction direction,
+			unsigned int sg_len, enum dma_transfer_direction direction,
 			unsigned long flags)
 {
 	struct intel_mid_dma_chan *midc = NULL;
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index aea5ee88ce03..c6de919a6401 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -262,7 +262,7 @@ struct intel_mid_dma_desc {
 	unsigned int			lli_length;
 	unsigned int			current_lli;
 	dma_addr_t			next;
-	enum dma_data_direction		dirn;
+	enum dma_transfer_direction		dirn;
 	enum dma_status			status;
 	enum dma_slave_buswidth		width; /*width of DMA txn*/
 	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index 6815905a772f..0cee3b30cd77 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1362,7 +1362,7 @@ static void ipu_gc_tasklet(unsigned long arg)
 /* Allocate and initialise a transfer descriptor. */
 static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
 		struct scatterlist *sgl, unsigned int sg_len,
-		enum dma_data_direction direction, unsigned long tx_flags)
+		enum dma_transfer_direction direction, unsigned long tx_flags)
 {
 	struct idmac_channel *ichan = to_idmac_chan(chan);
 	struct idmac_tx_desc *desc = NULL;
@@ -1374,7 +1374,7 @@ static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan
 	    chan->chan_id != IDMAC_IC_7)
 		return NULL;
 
-	if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE) {
+	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV) {
 		dev_err(chan->device->dev, "Invalid DMA direction %d!\n", direction);
 		return NULL;
 	}
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index b4588bdd98bb..bdf4672b2553 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -377,7 +377,7 @@ static void mxs_dma_free_chan_resources(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long append)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
@@ -450,7 +450,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 			ccw->bits |= CCW_CHAIN;
 			ccw->bits |= CCW_HALT_ON_TERM;
 			ccw->bits |= CCW_TERM_FLUSH;
-			ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+			ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
 					MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ,
 					COMMAND);
 
@@ -472,7 +472,7 @@ err_out:
 
 static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -515,7 +515,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		ccw->bits |= CCW_IRQ;
 		ccw->bits |= CCW_HALT_ON_TERM;
 		ccw->bits |= CCW_TERM_FLUSH;
-		ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+		ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
 				MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
 
 		dma_addr += period_len;
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index a6d0e3dbed07..9944e8295498 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -99,7 +99,7 @@ struct pch_dma_desc {
 struct pch_dma_chan {
 	struct dma_chan		chan;
 	void __iomem *membase;
-	enum dma_data_direction	dir;
+	enum dma_transfer_direction dir;
 	struct tasklet_struct	tasklet;
 	unsigned long		err_status;
 
@@ -224,7 +224,7 @@ static void pdc_set_dir(struct dma_chan *chan)
 		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
 				       (DMA_CTL0_BITS_PER_CH * chan->chan_id));
 		val &= mask_mode;
-		if (pd_chan->dir == DMA_TO_DEVICE)
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
 				       DMA_CTL0_DIR_SHIFT_BITS);
 		else
@@ -242,7 +242,7 @@ static void pdc_set_dir(struct dma_chan *chan)
 		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
 						 (DMA_CTL0_BITS_PER_CH * ch));
 		val &= mask_mode;
-		if (pd_chan->dir == DMA_TO_DEVICE)
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
 				       DMA_CTL0_DIR_SHIFT_BITS);
 		else
@@ -607,7 +607,7 @@ static void pd_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 			struct scatterlist *sgl, unsigned int sg_len,
-			enum dma_data_direction direction, unsigned long flags)
+			enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 	struct pch_dma_slave *pd_slave = chan->private;
@@ -623,9 +623,9 @@ static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 		return NULL;
 	}
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		reg = pd_slave->rx_reg;
-	else if (direction == DMA_TO_DEVICE)
+	else if (direction == DMA_MEM_TO_DEV)
 		reg = pd_slave->tx_reg;
 	else
 		return NULL;
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 571041477ab2..1e58eeb030d8 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -320,14 +320,14 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
 	case DMA_SLAVE_CONFIG:
 		slave_config = (struct dma_slave_config *)arg;
 
-		if (slave_config->direction == DMA_TO_DEVICE) {
+		if (slave_config->direction == DMA_MEM_TO_DEV) {
 			if (slave_config->dst_addr)
 				pch->fifo_addr = slave_config->dst_addr;
 			if (slave_config->dst_addr_width)
 				pch->burst_sz = __ffs(slave_config->dst_addr_width);
 			if (slave_config->dst_maxburst)
 				pch->burst_len = slave_config->dst_maxburst;
-		} else if (slave_config->direction == DMA_FROM_DEVICE) {
+		} else if (slave_config->direction == DMA_DEV_TO_MEM) {
 			if (slave_config->src_addr)
 				pch->fifo_addr = slave_config->src_addr;
 			if (slave_config->src_addr_width)
@@ -597,7 +597,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
 
 static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
@@ -612,13 +612,13 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 	}
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		desc->rqcfg.src_inc = 1;
 		desc->rqcfg.dst_inc = 0;
 		src = dma_addr;
 		dst = pch->fifo_addr;
 		break;
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		desc->rqcfg.src_inc = 0;
 		desc->rqcfg.dst_inc = 1;
 		src = pch->fifo_addr;
@@ -687,7 +687,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 
 static struct dma_async_tx_descriptor *
 pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flg)
 {
 	struct dma_pl330_desc *first, *desc = NULL;
@@ -702,9 +702,9 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		return NULL;
 
 	/* Make sure the direction is consistent */
-	if ((direction == DMA_TO_DEVICE &&
+	if ((direction == DMA_MEM_TO_DEV &&
 				peri->rqtype != MEMTODEV) ||
-			(direction == DMA_FROM_DEVICE &&
+			(direction == DMA_DEV_TO_MEM &&
 				peri->rqtype != DEVTOMEM)) {
 		dev_err(pch->dmac->pif.dev, "%s:%d Invalid Direction\n",
 				__func__, __LINE__);
@@ -747,7 +747,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		else
 			list_add_tail(&desc->node, &first->node);
 
-		if (direction == DMA_TO_DEVICE) {
+		if (direction == DMA_MEM_TO_DEV) {
 			desc->rqcfg.src_inc = 1;
 			desc->rqcfg.dst_inc = 0;
 			fill_px(&desc->px,
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 81809c2b46ab..592304fb41a6 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -23,7 +23,6 @@
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
-#include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/sh_dma.h>
@@ -479,19 +478,19 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan)
  * @sh_chan:	DMA channel
  * @flags:	DMA transfer flags
  * @dest:	destination DMA address, incremented when direction equals
- *		DMA_FROM_DEVICE or DMA_BIDIRECTIONAL
+ *		DMA_DEV_TO_MEM
  * @src:	source DMA address, incremented when direction equals
- *		DMA_TO_DEVICE or DMA_BIDIRECTIONAL
+ *		DMA_MEM_TO_DEV
  * @len:	DMA transfer length
  * @first:	if NULL, set to the current descriptor and cookie set to -EBUSY
  * @direction:	needed for slave DMA to decide which address to keep constant,
- *		equals DMA_BIDIRECTIONAL for MEMCPY
+ *		equals DMA_MEM_TO_MEM for MEMCPY
  * Returns 0 or an error
  * Locks: called with desc_lock held
  */
 static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
 	unsigned long flags, dma_addr_t *dest, dma_addr_t *src, size_t *len,
-	struct sh_desc **first, enum dma_data_direction direction)
+	struct sh_desc **first, enum dma_transfer_direction direction)
 {
 	struct sh_desc *new;
 	size_t copy_size;
@@ -531,9 +530,9 @@ static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
 	new->direction = direction;
 
 	*len -= copy_size;
-	if (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE)
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV)
 		*src += copy_size;
-	if (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE)
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM)
 		*dest += copy_size;
 
 	return new;
@@ -546,12 +545,12 @@ static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
  * converted to scatter-gather to guarantee consistent locking and a correct
  * list manipulation. For slave DMA direction carries the usual meaning, and,
  * logically, the SG list is RAM and the addr variable contains slave address,
- * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_BIDIRECTIONAL
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
  * and the SG list contains only one element and points at the source buffer.
  */
 static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_chan,
 	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct scatterlist *sg;
 	struct sh_desc *first = NULL, *new = NULL /* compiler... */;
@@ -592,7 +591,7 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_c
 			dev_dbg(sh_chan->dev, "Add SG #%d@%p[%d], dma %llx\n",
 				i, sg, len, (unsigned long long)sg_addr);
 
-			if (direction == DMA_FROM_DEVICE)
+			if (direction == DMA_DEV_TO_MEM)
 				new = sh_dmae_add_desc(sh_chan, flags,
 						&sg_addr, addr, &len, &first,
 						direction);
@@ -646,13 +645,13 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
 	sg_dma_address(&sg) = dma_src;
 	sg_dma_len(&sg) = len;
 
-	return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_BIDIRECTIONAL,
+	return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
 			       flags);
 }
 
 static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct sh_dmae_slave *param;
 	struct sh_dmae_chan *sh_chan;
@@ -996,7 +995,7 @@ static void dmae_do_tasklet(unsigned long data)
 	spin_lock_irq(&sh_chan->desc_lock);
 	list_for_each_entry(desc, &sh_chan->ld_queue, node) {
 		if (desc->mark == DESC_SUBMITTED &&
-		    ((desc->direction == DMA_FROM_DEVICE &&
+		    ((desc->direction == DMA_DEV_TO_MEM &&
 		      (desc->hw.dar + desc->hw.tcr) == dar_buf) ||
 		     (desc->hw.sar + desc->hw.tcr) == sar_buf)) {
 			dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n",
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 467e4dcb20a0..0c6cbacb8321 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -216,7 +216,7 @@ struct d40_chan {
 	struct d40_log_lli_full		*lcpa;
 	/* Runtime reconfiguration */
 	dma_addr_t			runtime_addr;
-	enum dma_data_direction		runtime_direction;
+	enum dma_transfer_direction	runtime_direction;
 };
 
 /**
@@ -1854,7 +1854,7 @@ err:
 }
 
 static dma_addr_t
-d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
+d40_get_dev_addr(struct d40_chan *chan, enum dma_transfer_direction direction)
 {
 	struct stedma40_platform_data *plat = chan->base->plat_data;
 	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
@@ -1863,9 +1863,9 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
 	if (chan->runtime_addr)
 		return chan->runtime_addr;
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		addr = plat->dev_rx[cfg->src_dev_type];
-	else if (direction == DMA_TO_DEVICE)
+	else if (direction == DMA_MEM_TO_DEV)
 		addr = plat->dev_tx[cfg->dst_dev_type];
 
 	return addr;
@@ -1874,7 +1874,7 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
 static struct dma_async_tx_descriptor *
 d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 	    struct scatterlist *sg_dst, unsigned int sg_len,
-	    enum dma_data_direction direction, unsigned long dma_flags)
+	    enum dma_transfer_direction direction, unsigned long dma_flags)
 {
 	struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
 	dma_addr_t src_dev_addr = 0;
@@ -1901,9 +1901,9 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 	if (direction != DMA_NONE) {
 		dma_addr_t dev_addr = d40_get_dev_addr(chan, direction);
 
-		if (direction == DMA_FROM_DEVICE)
+		if (direction == DMA_DEV_TO_MEM)
 			src_dev_addr = dev_addr;
-		else if (direction == DMA_TO_DEVICE)
+		else if (direction == DMA_MEM_TO_DEV)
 			dst_dev_addr = dev_addr;
 	}
 
@@ -2107,10 +2107,10 @@ d40_prep_memcpy_sg(struct dma_chan *chan,
 static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 							 struct scatterlist *sgl,
 							 unsigned int sg_len,
-							 enum dma_data_direction direction,
+							 enum dma_transfer_direction direction,
 							 unsigned long dma_flags)
 {
-	if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE)
+	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV)
 		return NULL;
 
 	return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
@@ -2119,7 +2119,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 static struct dma_async_tx_descriptor *
 dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 		     size_t buf_len, size_t period_len,
-		     enum dma_data_direction direction)
+		     enum dma_transfer_direction direction)
 {
 	unsigned int periods = buf_len / period_len;
 	struct dma_async_tx_descriptor *txd;
@@ -2268,7 +2268,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
 	dst_addr_width = config->dst_addr_width;
 	dst_maxburst = config->dst_maxburst;
 
-	if (config->direction == DMA_FROM_DEVICE) {
+	if (config->direction == DMA_DEV_TO_MEM) {
 		dma_addr_t dev_addr_rx =
 			d40c->base->plat_data->dev_rx[cfg->src_dev_type];
 
@@ -2291,7 +2291,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
 		if (dst_maxburst == 0)
 			dst_maxburst = src_maxburst;
 
-	} else if (config->direction == DMA_TO_DEVICE) {
+	} else if (config->direction == DMA_MEM_TO_DEV) {
 		dma_addr_t dev_addr_tx =
 			d40c->base->plat_data->dev_tx[cfg->dst_dev_type];
 
@@ -2356,7 +2356,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
 		"configured channel %s for %s, data width %d/%d, "
 		"maxburst %d/%d elements, LE, no flow control\n",
 		dma_chan_name(chan),
-		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		(config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
 		src_addr_width, dst_addr_width,
 		src_maxburst, dst_maxburst);
 
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index a4a398f2ef61..8c880729b094 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -90,7 +90,7 @@ struct timb_dma_chan {
 	struct list_head	queue;
 	struct list_head	free_list;
 	unsigned int		bytes_per_line;
-	enum dma_data_direction	direction;
+	enum dma_transfer_direction	direction;
 	unsigned int		descs; /* Descriptors to allocate */
 	unsigned int		desc_elems; /* number of elems per descriptor */
 };
@@ -235,7 +235,7 @@ static void __td_start_dma(struct timb_dma_chan *td_chan)
 		"td_chan: %p, chan: %d, membase: %p\n",
 		td_chan, td_chan->chan.chan_id, td_chan->membase);
 
-	if (td_chan->direction == DMA_FROM_DEVICE) {
+	if (td_chan->direction == DMA_DEV_TO_MEM) {
 
 		/* descriptor address */
 		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR);
@@ -278,7 +278,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
 		txd->cookie);
 
 	/* make sure to stop the transfer */
-	if (td_chan->direction == DMA_FROM_DEVICE)
+	if (td_chan->direction == DMA_DEV_TO_MEM)
 		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER);
 /* Currently no support for stopping DMA transfers
 	else
@@ -398,7 +398,7 @@ static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan)
 	td_desc->txd.flags = DMA_CTRL_ACK;
 
 	td_desc->txd.phys = dma_map_single(chan2dmadev(chan),
-		td_desc->desc_list, td_desc->desc_list_len, DMA_TO_DEVICE);
+		td_desc->desc_list, td_desc->desc_list_len, DMA_MEM_TO_DEV);
 
 	err = dma_mapping_error(chan2dmadev(chan), td_desc->txd.phys);
 	if (err) {
@@ -419,7 +419,7 @@ static void td_free_desc(struct timb_dma_desc *td_desc)
 {
 	dev_dbg(chan2dev(td_desc->txd.chan), "Freeing desc: %p\n", td_desc);
 	dma_unmap_single(chan2dmadev(td_desc->txd.chan), td_desc->txd.phys,
-		td_desc->desc_list_len, DMA_TO_DEVICE);
+		td_desc->desc_list_len, DMA_MEM_TO_DEV);
 
 	kfree(td_desc->desc_list);
 	kfree(td_desc);
@@ -558,7 +558,7 @@ static void td_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
 	struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct timb_dma_chan *td_chan =
 		container_of(chan, struct timb_dma_chan, chan);
@@ -606,7 +606,7 @@ static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
 	}
 
 	dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys,
-		td_desc->desc_list_len, DMA_TO_DEVICE);
+		td_desc->desc_list_len, DMA_MEM_TO_DEV);
 
 	return &td_desc->txd;
 }
@@ -775,8 +775,8 @@ static int __devinit td_probe(struct platform_device *pdev)
 		td_chan->descs = pchan->descriptors;
 		td_chan->desc_elems = pchan->descriptor_elements;
 		td_chan->bytes_per_line = pchan->bytes_per_line;
-		td_chan->direction = pchan->rx ? DMA_FROM_DEVICE :
-			DMA_TO_DEVICE;
+		td_chan->direction = pchan->rx ? DMA_DEV_TO_MEM :
+			DMA_MEM_TO_DEV;
 
 		td_chan->membase = td->membase +
 			(i / 2) * TIMBDMA_INSTANCE_OFFSET +
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index cbd83e362b5e..6122c364cf11 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -845,7 +845,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 static struct dma_async_tx_descriptor *
 txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
@@ -860,9 +860,9 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	BUG_ON(!ds || !ds->reg_width);
 	if (ds->tx_reg)
-		BUG_ON(direction != DMA_TO_DEVICE);
+		BUG_ON(direction != DMA_MEM_TO_DEV);
 	else
-		BUG_ON(direction != DMA_FROM_DEVICE);
+		BUG_ON(direction != DMA_DEV_TO_MEM);
 	if (unlikely(!sg_len))
 		return NULL;
 
@@ -882,7 +882,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		mem = sg_dma_address(sg);
 
 		if (__is_dmac64(ddev)) {
-			if (direction == DMA_TO_DEVICE) {
+			if (direction == DMA_MEM_TO_DEV) {
 				desc->hwdesc.SAR = mem;
 				desc->hwdesc.DAR = ds->tx_reg;
 			} else {
@@ -891,7 +891,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			}
 			desc->hwdesc.CNTR = sg_dma_len(sg);
 		} else {
-			if (direction == DMA_TO_DEVICE) {
+			if (direction == DMA_MEM_TO_DEV) {
 				desc->hwdesc32.SAR = mem;
 				desc->hwdesc32.DAR = ds->tx_reg;
 			} else {
@@ -900,7 +900,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			}
 			desc->hwdesc32.CNTR = sg_dma_len(sg);
 		}
-		if (direction == DMA_TO_DEVICE) {
+		if (direction == DMA_MEM_TO_DEV) {
 			sai = ds->reg_width;
 			dai = 0;
 		} else {
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 9eabffbc4e50..033f6aa670de 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -134,7 +134,7 @@ struct pl08x_txd {
 	struct dma_async_tx_descriptor tx;
 	struct list_head node;
 	struct list_head dsg_list;
-	enum dma_data_direction	direction;
+	enum dma_transfer_direction direction;
 	dma_addr_t llis_bus;
 	struct pl08x_lli *llis_va;
 	/* Default cctl value for LLIs */
@@ -197,7 +197,7 @@ struct pl08x_dma_chan {
 	dma_addr_t dst_addr;
 	u32 src_cctl;
 	u32 dst_cctl;
-	enum dma_data_direction	runtime_direction;
+	enum dma_transfer_direction runtime_direction;
 	dma_cookie_t lc;
 	struct list_head pend_list;
 	struct pl08x_txd *at;
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 4bfe0a2f7d50..f2c64f92c4a0 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -127,7 +127,7 @@ struct dw_cyclic_desc {
 
 struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_data_direction direction);
+		enum dma_transfer_direction direction);
 void dw_dma_cyclic_free(struct dma_chan *chan);
 int dw_dma_cyclic_start(struct dma_chan *chan);
 void dw_dma_cyclic_stop(struct dma_chan *chan);
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index cb2dd118cc0f..62ef6938da10 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -30,7 +30,7 @@ struct sh_desc {
 	struct sh_dmae_regs hw;
 	struct list_head node;
 	struct dma_async_tx_descriptor async_tx;
-	enum dma_data_direction direction;
+	enum dma_transfer_direction direction;
 	dma_cookie_t cookie;
 	size_t partial;
 	int chunks;
-- 
cgit v1.2.3


From fb6c721b69d4ac518b9be6de8f44ba87a0c0d235 Mon Sep 17 00:00:00 2001
From: Kyle Manna <kyle.manna@fuel7.com>
Date: Sat, 29 Oct 2011 12:31:35 -0700
Subject: Input: tca8418_keypad - initial driver release

This driver has been tested with hardware and works as expected.  To use
it add the platform data as appropriate and register it with the
corresponding I2C bus.

Signed-off-by: Kyle Manna <kyle.manna@fuel7.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/Kconfig          |  16 ++
 drivers/input/keyboard/Makefile         |   1 +
 drivers/input/keyboard/tca8418_keypad.c | 430 ++++++++++++++++++++++++++++++++
 include/linux/input/tca8418_keypad.h    |  44 ++++
 4 files changed, 491 insertions(+)
 create mode 100644 drivers/input/keyboard/tca8418_keypad.c
 create mode 100644 include/linux/input/tca8418_keypad.h

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 615c21f2a553..90d5f0a8f882 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -221,6 +221,22 @@ config KEYBOARD_TCA6416
 	  To compile this driver as a module, choose M here: the
 	  module will be called tca6416_keypad.
 
+config KEYBOARD_TCA8418
+	tristate "TCA8418 Keypad Support"
+	depends on I2C
+	help
+	  This driver implements basic keypad functionality
+	  for keys connected through TCA8418 keypad decoder.
+
+	  Say Y here if your device has keys connected to
+	  TCA8418 keypad decoder.
+
+	  If enabled the complete TCA8418 device will be managed through
+	  this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called tca8418_keypad.
+
 config KEYBOARD_MATRIX
 	tristate "GPIO driven matrix keypad support"
 	depends on GENERIC_GPIO
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index ddde0fd476f7..df7061f12918 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_KEYBOARD_EP93XX)		+= ep93xx_keypad.o
 obj-$(CONFIG_KEYBOARD_GPIO)		+= gpio_keys.o
 obj-$(CONFIG_KEYBOARD_GPIO_POLLED)	+= gpio_keys_polled.o
 obj-$(CONFIG_KEYBOARD_TCA6416)		+= tca6416-keypad.o
+obj-$(CONFIG_KEYBOARD_TCA8418)		+= tca8418_keypad.o
 obj-$(CONFIG_KEYBOARD_HIL)		+= hil_kbd.o
 obj-$(CONFIG_KEYBOARD_HIL_OLD)		+= hilkbd.o
 obj-$(CONFIG_KEYBOARD_IMX)		+= imx_keypad.o
diff --git a/drivers/input/keyboard/tca8418_keypad.c b/drivers/input/keyboard/tca8418_keypad.c
new file mode 100644
index 000000000000..958ec107bfbc
--- /dev/null
+++ b/drivers/input/keyboard/tca8418_keypad.c
@@ -0,0 +1,430 @@
+/*
+ * Driver for TCA8418 I2C keyboard
+ *
+ * Copyright (C) 2011 Fuel7, Inc.  All rights reserved.
+ *
+ * Author: Kyle Manna <kyle.manna@fuel7.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * If you can't comply with GPLv2, alternative licensing terms may be
+ * arranged. Please contact Fuel7, Inc. (http://fuel7.com/) for proprietary
+ * alternative licensing inquiries.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/tca8418_keypad.h>
+
+/* TCA8418 hardware limits */
+#define TCA8418_MAX_ROWS	8
+#define TCA8418_MAX_COLS	10
+
+/* TCA8418 register offsets */
+#define REG_CFG			0x01
+#define REG_INT_STAT		0x02
+#define REG_KEY_LCK_EC		0x03
+#define REG_KEY_EVENT_A		0x04
+#define REG_KEY_EVENT_B		0x05
+#define REG_KEY_EVENT_C		0x06
+#define REG_KEY_EVENT_D		0x07
+#define REG_KEY_EVENT_E		0x08
+#define REG_KEY_EVENT_F		0x09
+#define REG_KEY_EVENT_G		0x0A
+#define REG_KEY_EVENT_H		0x0B
+#define REG_KEY_EVENT_I		0x0C
+#define REG_KEY_EVENT_J		0x0D
+#define REG_KP_LCK_TIMER	0x0E
+#define REG_UNLOCK1		0x0F
+#define REG_UNLOCK2		0x10
+#define REG_GPIO_INT_STAT1	0x11
+#define REG_GPIO_INT_STAT2	0x12
+#define REG_GPIO_INT_STAT3	0x13
+#define REG_GPIO_DAT_STAT1	0x14
+#define REG_GPIO_DAT_STAT2	0x15
+#define REG_GPIO_DAT_STAT3	0x16
+#define REG_GPIO_DAT_OUT1	0x17
+#define REG_GPIO_DAT_OUT2	0x18
+#define REG_GPIO_DAT_OUT3	0x19
+#define REG_GPIO_INT_EN1	0x1A
+#define REG_GPIO_INT_EN2	0x1B
+#define REG_GPIO_INT_EN3	0x1C
+#define REG_KP_GPIO1		0x1D
+#define REG_KP_GPIO2		0x1E
+#define REG_KP_GPIO3		0x1F
+#define REG_GPI_EM1		0x20
+#define REG_GPI_EM2		0x21
+#define REG_GPI_EM3		0x22
+#define REG_GPIO_DIR1		0x23
+#define REG_GPIO_DIR2		0x24
+#define REG_GPIO_DIR3		0x25
+#define REG_GPIO_INT_LVL1	0x26
+#define REG_GPIO_INT_LVL2	0x27
+#define REG_GPIO_INT_LVL3	0x28
+#define REG_DEBOUNCE_DIS1	0x29
+#define REG_DEBOUNCE_DIS2	0x2A
+#define REG_DEBOUNCE_DIS3	0x2B
+#define REG_GPIO_PULL1		0x2C
+#define REG_GPIO_PULL2		0x2D
+#define REG_GPIO_PULL3		0x2E
+
+/* TCA8418 bit definitions */
+#define CFG_AI			BIT(7)
+#define CFG_GPI_E_CFG		BIT(6)
+#define CFG_OVR_FLOW_M		BIT(5)
+#define CFG_INT_CFG		BIT(4)
+#define CFG_OVR_FLOW_IEN	BIT(3)
+#define CFG_K_LCK_IEN		BIT(2)
+#define CFG_GPI_IEN		BIT(1)
+#define CFG_KE_IEN		BIT(0)
+
+#define INT_STAT_CAD_INT	BIT(4)
+#define INT_STAT_OVR_FLOW_INT	BIT(3)
+#define INT_STAT_K_LCK_INT	BIT(2)
+#define INT_STAT_GPI_INT	BIT(1)
+#define INT_STAT_K_INT		BIT(0)
+
+/* TCA8418 register masks */
+#define KEY_LCK_EC_KEC		0x7
+#define KEY_EVENT_CODE		0x7f
+#define KEY_EVENT_VALUE		0x80
+
+
+static const struct i2c_device_id tca8418_id[] = {
+	{ TCA8418_NAME, 8418, },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tca8418_id);
+
+struct tca8418_keypad {
+	unsigned int rows;
+	unsigned int cols;
+	unsigned int keypad_mask; /* Mask for keypad col/rol regs */
+	unsigned int irq;
+	unsigned int row_shift;
+
+	struct i2c_client *client;
+	struct input_dev *input;
+
+	/* Flexible array member, must be at end of struct */
+	unsigned short keymap[];
+};
+
+/*
+ * Write a byte to the TCA8418
+ */
+static int tca8418_write_byte(struct tca8418_keypad *keypad_data,
+			      int reg, u8 val)
+{
+	int error;
+
+	error = i2c_smbus_write_byte_data(keypad_data->client, reg, val);
+	if (error < 0) {
+		dev_err(&keypad_data->client->dev,
+			"%s failed, reg: %d, val: %d, error: %d\n",
+			__func__, reg, val, error);
+		return error;
+	}
+
+	return 0;
+}
+
+/*
+ * Read a byte from the TCA8418
+ */
+static int tca8418_read_byte(struct tca8418_keypad *keypad_data,
+			     int reg, u8 *val)
+{
+	int error;
+
+	error = i2c_smbus_read_byte_data(keypad_data->client, reg);
+	if (error < 0) {
+		dev_err(&keypad_data->client->dev,
+				"%s failed, reg: %d, error: %d\n",
+				__func__, reg, error);
+		return error;
+	}
+
+	*val = (u8)error;
+
+	return 0;
+}
+
+static void tca8418_read_keypad(struct tca8418_keypad *keypad_data)
+{
+	int error, col, row;
+	u8 reg, state, code;
+
+	/* Initial read of the key event FIFO */
+	error = tca8418_read_byte(keypad_data, REG_KEY_EVENT_A, &reg);
+
+	/* Assume that key code 0 signifies empty FIFO */
+	while (error >= 0 && reg > 0) {
+		state = reg & KEY_EVENT_VALUE;
+		code  = reg & KEY_EVENT_CODE;
+
+		row = code / TCA8418_MAX_COLS;
+		col = code % TCA8418_MAX_COLS;
+
+		row = (col) ? row : row - 1;
+		col = (col) ? col - 1 : TCA8418_MAX_COLS - 1;
+
+		code = MATRIX_SCAN_CODE(row, col, keypad_data->row_shift);
+		input_event(keypad_data->input, EV_MSC, MSC_SCAN, code);
+		input_report_key(keypad_data->input,
+				keypad_data->keymap[code], state);
+
+		/* Read for next loop */
+		error = tca8418_read_byte(keypad_data, REG_KEY_EVENT_A, &reg);
+	}
+
+	if (error < 0)
+		dev_err(&keypad_data->client->dev,
+			"unable to read REG_KEY_EVENT_A\n");
+
+	input_sync(keypad_data->input);
+}
+
+/*
+ * Threaded IRQ handler and this can (and will) sleep.
+ */
+static irqreturn_t tca8418_irq_handler(int irq, void *dev_id)
+{
+	struct tca8418_keypad *keypad_data = dev_id;
+	u8 reg;
+	int error;
+
+	error = tca8418_read_byte(keypad_data, REG_INT_STAT, &reg);
+	if (error) {
+		dev_err(&keypad_data->client->dev,
+			"unable to read REG_INT_STAT\n");
+		goto exit;
+	}
+
+	if (reg & INT_STAT_OVR_FLOW_INT)
+		dev_warn(&keypad_data->client->dev, "overflow occurred\n");
+
+	if (reg & INT_STAT_K_INT)
+		tca8418_read_keypad(keypad_data);
+
+exit:
+	/* Clear all interrupts, even IRQs we didn't check (GPI, CAD, LCK) */
+	reg = 0xff;
+	error = tca8418_write_byte(keypad_data, REG_INT_STAT, reg);
+	if (error)
+		dev_err(&keypad_data->client->dev,
+			"unable to clear REG_INT_STAT\n");
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Configure the TCA8418 for keypad operation
+ */
+static int __devinit tca8418_configure(struct tca8418_keypad *keypad_data)
+{
+	int reg, error;
+
+	/* Write config register, if this fails assume device not present */
+	error = tca8418_write_byte(keypad_data, REG_CFG,
+				CFG_INT_CFG | CFG_OVR_FLOW_IEN | CFG_KE_IEN);
+	if (error < 0)
+		return -ENODEV;
+
+
+	/* Assemble a mask for row and column registers */
+	reg  =  ~(~0 << keypad_data->rows);
+	reg += (~(~0 << keypad_data->cols)) << 8;
+	keypad_data->keypad_mask = reg;
+
+	/* Set registers to keypad mode */
+	error |= tca8418_write_byte(keypad_data, REG_KP_GPIO1, reg);
+	error |= tca8418_write_byte(keypad_data, REG_KP_GPIO2, reg >> 8);
+	error |= tca8418_write_byte(keypad_data, REG_KP_GPIO3, reg >> 16);
+
+	/* Enable column debouncing */
+	error |= tca8418_write_byte(keypad_data, REG_DEBOUNCE_DIS1, reg);
+	error |= tca8418_write_byte(keypad_data, REG_DEBOUNCE_DIS2, reg >> 8);
+	error |= tca8418_write_byte(keypad_data, REG_DEBOUNCE_DIS3, reg >> 16);
+
+	return error;
+}
+
+static int __devinit tca8418_keypad_probe(struct i2c_client *client,
+					  const struct i2c_device_id *id)
+{
+	const struct tca8418_keypad_platform_data *pdata =
+						client->dev.platform_data;
+	struct tca8418_keypad *keypad_data;
+	struct input_dev *input;
+	int error, row_shift, max_keys;
+
+	/* Copy the platform data */
+	if (!pdata) {
+		dev_dbg(&client->dev, "no platform data\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->keymap_data) {
+		dev_err(&client->dev, "no keymap data defined\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->rows || pdata->rows > TCA8418_MAX_ROWS) {
+		dev_err(&client->dev, "invalid rows\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->cols || pdata->cols > TCA8418_MAX_COLS) {
+		dev_err(&client->dev, "invalid columns\n");
+		return -EINVAL;
+	}
+
+	/* Check i2c driver capabilities */
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) {
+		dev_err(&client->dev, "%s adapter not supported\n",
+			dev_driver_string(&client->adapter->dev));
+		return -ENODEV;
+	}
+
+	row_shift = get_count_order(pdata->cols);
+	max_keys = pdata->rows << row_shift;
+
+	/* Allocate memory for keypad_data, keymap and input device */
+	keypad_data = kzalloc(sizeof(*keypad_data) +
+			max_keys * sizeof(keypad_data->keymap[0]), GFP_KERNEL);
+	if (!keypad_data)
+		return -ENOMEM;
+
+	keypad_data->rows = pdata->rows;
+	keypad_data->cols = pdata->cols;
+	keypad_data->client = client;
+	keypad_data->row_shift = row_shift;
+
+	/* Initialize the chip or fail if chip isn't present */
+	error = tca8418_configure(keypad_data);
+	if (error < 0)
+		goto fail1;
+
+	/* Configure input device */
+	input = input_allocate_device();
+	if (!input) {
+		error = -ENOMEM;
+		goto fail1;
+	}
+	keypad_data->input = input;
+
+	input->name = client->name;
+	input->dev.parent = &client->dev;
+
+	input->id.bustype = BUS_I2C;
+	input->id.vendor  = 0x0001;
+	input->id.product = 0x001;
+	input->id.version = 0x0001;
+
+	input->keycode     = keypad_data->keymap;
+	input->keycodesize = sizeof(keypad_data->keymap[0]);
+	input->keycodemax  = max_keys;
+
+	__set_bit(EV_KEY, input->evbit);
+	if (pdata->rep)
+		__set_bit(EV_REP, input->evbit);
+
+	input_set_capability(input, EV_MSC, MSC_SCAN);
+
+	input_set_drvdata(input, keypad_data);
+
+	matrix_keypad_build_keymap(pdata->keymap_data, row_shift,
+			input->keycode, input->keybit);
+
+	if (pdata->irq_is_gpio)
+		client->irq = gpio_to_irq(client->irq);
+
+	error = request_threaded_irq(client->irq, NULL, tca8418_irq_handler,
+				     IRQF_TRIGGER_FALLING,
+				     client->name, keypad_data);
+	if (error) {
+		dev_dbg(&client->dev,
+			"Unable to claim irq %d; error %d\n",
+			client->irq, error);
+		goto fail2;
+	}
+
+	error = input_register_device(input);
+	if (error) {
+		dev_dbg(&client->dev,
+			"Unable to register input device, error: %d\n", error);
+		goto fail3;
+	}
+
+	i2c_set_clientdata(client, keypad_data);
+	return 0;
+
+fail3:
+	free_irq(client->irq, keypad_data);
+fail2:
+	input_free_device(input);
+fail1:
+	kfree(keypad_data);
+	return error;
+}
+
+static int __devexit tca8418_keypad_remove(struct i2c_client *client)
+{
+	struct tca8418_keypad *keypad_data = i2c_get_clientdata(client);
+
+	free_irq(keypad_data->client->irq, keypad_data);
+
+	input_unregister_device(keypad_data->input);
+
+	kfree(keypad_data);
+
+	return 0;
+}
+
+
+static struct i2c_driver tca8418_keypad_driver = {
+	.driver = {
+		.name	= TCA8418_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.probe		= tca8418_keypad_probe,
+	.remove		= __devexit_p(tca8418_keypad_remove),
+	.id_table	= tca8418_id,
+};
+
+static int __init tca8418_keypad_init(void)
+{
+	return i2c_add_driver(&tca8418_keypad_driver);
+}
+subsys_initcall(tca8418_keypad_init);
+
+static void __exit tca8418_keypad_exit(void)
+{
+	i2c_del_driver(&tca8418_keypad_driver);
+}
+module_exit(tca8418_keypad_exit);
+
+MODULE_AUTHOR("Kyle Manna <kyle.manna@fuel7.com>");
+MODULE_DESCRIPTION("Keypad driver for TCA8418");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/input/tca8418_keypad.h b/include/linux/input/tca8418_keypad.h
new file mode 100644
index 000000000000..e71a85dc2cbd
--- /dev/null
+++ b/include/linux/input/tca8418_keypad.h
@@ -0,0 +1,44 @@
+/*
+ * TCA8418 keypad platform support
+ *
+ * Copyright (C) 2011 Fuel7, Inc.  All rights reserved.
+ *
+ * Author: Kyle Manna <kyle.manna@fuel7.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * If you can't comply with GPLv2, alternative licensing terms may be
+ * arranged. Please contact Fuel7, Inc. (http://fuel7.com/) for proprietary
+ * alternative licensing inquiries.
+ */
+
+#ifndef _TCA8418_KEYPAD_H
+#define _TCA8418_KEYPAD_H
+
+#include <linux/types.h>
+#include <linux/input/matrix_keypad.h>
+
+#define TCA8418_I2C_ADDR	0x34
+#define	TCA8418_NAME		"tca8418_keypad"
+
+struct tca8418_keypad_platform_data {
+	const struct matrix_keymap_data *keymap_data;
+	unsigned rows;
+	unsigned cols;
+	bool rep;
+	bool irq_is_gpio;
+};
+
+#endif
-- 
cgit v1.2.3


From b60503ba432b16fc84442a84e29a7aad2c0c363d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 20 Jan 2011 12:50:14 -0500
Subject: NVMe: New driver

This driver is for devices that follow the NVM Express standard

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 Documentation/ioctl/ioctl-number.txt |    1 +
 drivers/block/Kconfig                |   11 +
 drivers/block/Makefile               |    1 +
 drivers/block/nvme.c                 | 1043 ++++++++++++++++++++++++++++++++++
 include/linux/nvme.h                 |  343 +++++++++++
 5 files changed, 1399 insertions(+)
 create mode 100644 drivers/block/nvme.c
 create mode 100644 include/linux/nvme.h

(limited to 'include/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 54078ed96b37..4840334ea97b 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -149,6 +149,7 @@ Code  Seq#(hex)	Include File		Comments
 'M'	01-03	drivers/scsi/megaraid/megaraid_sas.h
 'M'	00-0F	drivers/video/fsl-diu-fb.h	conflict!
 'N'	00-1F	drivers/usb/scanner.h
+'N'	40-7F	drivers/block/nvme.c
 'O'     00-06   mtd/ubi-user.h		UBI
 'P'	all	linux/soundcard.h	conflict!
 'P'	60-6F	sound/sscape_ioctl.h	conflict!
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 6f07ec1c2f58..35e56e1c948f 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -315,6 +315,17 @@ config BLK_DEV_NBD
 
 	  If unsure, say N.
 
+config BLK_DEV_NVME
+	tristate "NVM Express block device"
+	depends on PCI
+	---help---
+	  The NVM Express driver is for solid state drives directly
+	  connected to the PCI or PCI Express bus.  If you know you
+	  don't have one of these, it is safe to answer N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called nvme.
+
 config BLK_DEV_OSD
 	tristate "OSD object-as-blkdev support"
 	depends on SCSI_OSD_ULD
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 76646e9a1c91..349539ad3ad9 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_XILINX_SYSACE)	+= xsysace.o
 obj-$(CONFIG_CDROM_PKTCDVD)	+= pktcdvd.o
 obj-$(CONFIG_MG_DISK)		+= mg_disk.o
 obj-$(CONFIG_SUNVDC)		+= sunvdc.o
+obj-$(CONFIG_BLK_DEV_NVME)	+= nvme.o
 obj-$(CONFIG_BLK_DEV_OSD)	+= osdblk.o
 
 obj-$(CONFIG_BLK_DEV_UMEM)	+= umem.o
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
new file mode 100644
index 000000000000..ef66eccc2aa2
--- /dev/null
+++ b/drivers/block/nvme.c
@@ -0,0 +1,1043 @@
+/*
+ * NVM Express device driver
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/nvme.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+
+#define NVME_Q_DEPTH 1024
+#define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
+#define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
+#define NVME_MINORS 64
+
+static int nvme_major;
+module_param(nvme_major, int, 0);
+
+/*
+ * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+	struct list_head node;
+	struct nvme_queue **queues;
+	u32 __iomem *dbs;
+	struct pci_dev *pci_dev;
+	int instance;
+	int queue_count;
+	u32 ctrl_config;
+	struct msix_entry *entry;
+	struct nvme_bar __iomem *bar;
+	struct list_head namespaces;
+};
+
+/*
+ * An NVM Express namespace is equivalent to a SCSI LUN
+ */
+struct nvme_ns {
+	struct list_head list;
+
+	struct nvme_dev *dev;
+	struct request_queue *queue;
+	struct gendisk *disk;
+
+	int ns_id;
+	int lba_shift;
+};
+
+/*
+ * An NVM Express queue.  Each device has at least two (one for admin
+ * commands and one for I/O commands).
+ */
+struct nvme_queue {
+	struct device *q_dmadev;
+	spinlock_t q_lock;
+	struct nvme_command *sq_cmds;
+	volatile struct nvme_completion *cqes;
+	dma_addr_t sq_dma_addr;
+	dma_addr_t cq_dma_addr;
+	wait_queue_head_t sq_full;
+	struct bio_list sq_cong;
+	u32 __iomem *q_db;
+	u16 q_depth;
+	u16 cq_vector;
+	u16 sq_head;
+	u16 sq_tail;
+	u16 cq_head;
+	u16 cq_cycle;
+	unsigned long cmdid_data[];
+};
+
+/*
+ * Check we didin't inadvertently grow the command struct
+ */
+static inline void _nvme_check_size(void)
+{
+	BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
+}
+
+/**
+ * alloc_cmdid - Allocate a Command ID
+ * @param nvmeq The queue that will be used for this command
+ * @param ctx A pointer that will be passed to the handler
+ * @param handler The ID of the handler to call
+ *
+ * Allocate a Command ID for a queue.  The data passed in will
+ * be passed to the completion handler.  This is implemented by using
+ * the bottom two bits of the ctx pointer to store the handler ID.
+ * Passing in a pointer that's not 4-byte aligned will cause a BUG.
+ * We can change this if it becomes a problem.
+ */
+static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, int handler)
+{
+	int depth = nvmeq->q_depth;
+	unsigned long data = (unsigned long)ctx | handler;
+	int cmdid;
+
+	BUG_ON((unsigned long)ctx & 3);
+
+	do {
+		cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth);
+		if (cmdid >= depth)
+			return -EBUSY;
+	} while (test_and_set_bit(cmdid, nvmeq->cmdid_data));
+
+	nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(depth)] = data;
+	return cmdid;
+}
+
+static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
+								int handler)
+{
+	int cmdid;
+	wait_event_killable(nvmeq->sq_full,
+			(cmdid = alloc_cmdid(nvmeq, ctx, handler)) >= 0);
+	return (cmdid < 0) ? -EINTR : cmdid;
+}
+
+/* If you need more than four handlers, you'll need to change how
+ * alloc_cmdid and nvme_process_cq work
+ */
+enum {
+	sync_completion_id = 0,
+	bio_completion_id,
+};
+
+static unsigned long free_cmdid(struct nvme_queue *nvmeq, int cmdid)
+{
+	unsigned long data;
+
+	data = nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(nvmeq->q_depth)];
+	clear_bit(cmdid, nvmeq->cmdid_data);
+	wake_up(&nvmeq->sq_full);
+	return data;
+}
+
+static struct nvme_queue *get_nvmeq(struct nvme_ns *ns)
+{
+	return ns->dev->queues[1];
+}
+
+static void put_nvmeq(struct nvme_queue *nvmeq)
+{
+}
+
+/**
+ * nvme_submit_cmd: Copy a command into a queue and ring the doorbell
+ * @nvmeq: The queue to use
+ * @cmd: The command to send
+ *
+ * Safe to use from interrupt context
+ */
+static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+{
+	unsigned long flags;
+	u16 tail;
+	/* XXX: Need to check tail isn't going to overrun head */
+	spin_lock_irqsave(&nvmeq->q_lock, flags);
+	tail = nvmeq->sq_tail;
+	memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+	writel(tail, nvmeq->q_db);
+	if (++tail == nvmeq->q_depth)
+		tail = 0;
+	nvmeq->sq_tail = tail;
+	spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+
+	return 0;
+}
+
+struct nvme_req_info {
+	struct bio *bio;
+	int nents;
+	struct scatterlist sg[0];
+};
+
+/* XXX: use a mempool */
+static struct nvme_req_info *alloc_info(unsigned nseg, gfp_t gfp)
+{
+	return kmalloc(sizeof(struct nvme_req_info) +
+			sizeof(struct scatterlist) * nseg, gfp);
+}
+
+static void free_info(struct nvme_req_info *info)
+{
+	kfree(info);
+}
+
+static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
+						struct nvme_completion *cqe)
+{
+	struct nvme_req_info *info = ctx;
+	struct bio *bio = info->bio;
+	u16 status = le16_to_cpup(&cqe->status) >> 1;
+
+	dma_unmap_sg(nvmeq->q_dmadev, info->sg, info->nents,
+			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	free_info(info);
+	bio_endio(bio, status ? -EIO : 0);
+}
+
+static int nvme_map_bio(struct device *dev, struct nvme_req_info *info,
+		struct bio *bio, enum dma_data_direction dma_dir, int psegs)
+{
+	struct bio_vec *bvec;
+	struct scatterlist *sg = info->sg;
+	int i, nsegs;
+
+	sg_init_table(sg, psegs);
+	bio_for_each_segment(bvec, bio, i) {
+		sg_set_page(sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
+		/* XXX: handle non-mergable here */
+		nsegs++;
+	}
+	info->nents = nsegs;
+
+	return dma_map_sg(dev, info->sg, info->nents, dma_dir);
+}
+
+static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+								struct bio *bio)
+{
+	struct nvme_rw_command *cmnd;
+	struct nvme_req_info *info;
+	enum dma_data_direction dma_dir;
+	int cmdid;
+	u16 control;
+	u32 dsmgmt;
+	unsigned long flags;
+	int psegs = bio_phys_segments(ns->queue, bio);
+
+	info = alloc_info(psegs, GFP_NOIO);
+	if (!info)
+		goto congestion;
+	info->bio = bio;
+
+	cmdid = alloc_cmdid(nvmeq, info, bio_completion_id);
+	if (unlikely(cmdid < 0))
+		goto free_info;
+
+	control = 0;
+	if (bio->bi_rw & REQ_FUA)
+		control |= NVME_RW_FUA;
+	if (bio->bi_rw & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+		control |= NVME_RW_LR;
+
+	dsmgmt = 0;
+	if (bio->bi_rw & REQ_RAHEAD)
+		dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+
+	spin_lock_irqsave(&nvmeq->q_lock, flags);
+	cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail].rw;
+
+	if (bio_data_dir(bio)) {
+		cmnd->opcode = nvme_cmd_write;
+		dma_dir = DMA_TO_DEVICE;
+	} else {
+		cmnd->opcode = nvme_cmd_read;
+		dma_dir = DMA_FROM_DEVICE;
+	}
+
+	nvme_map_bio(nvmeq->q_dmadev, info, bio, dma_dir, psegs);
+
+	cmnd->flags = 1;
+	cmnd->command_id = cmdid;
+	cmnd->nsid = cpu_to_le32(ns->ns_id);
+	cmnd->prp1 = cpu_to_le64(sg_phys(info->sg));
+	/* XXX: Support more than one PRP */
+	cmnd->slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9));
+	cmnd->length = cpu_to_le16((bio->bi_size >> ns->lba_shift) - 1);
+	cmnd->control = cpu_to_le16(control);
+	cmnd->dsmgmt = cpu_to_le32(dsmgmt);
+
+	writel(nvmeq->sq_tail, nvmeq->q_db);
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+
+	spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+
+	return 0;
+
+ free_info:
+	free_info(info);
+ congestion:
+	return -EBUSY;
+}
+
+/*
+ * NB: return value of non-zero would mean that we were a stacking driver.
+ * make_request must always succeed.
+ */
+static int nvme_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct nvme_ns *ns = q->queuedata;
+	struct nvme_queue *nvmeq = get_nvmeq(ns);
+
+	if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
+		blk_set_queue_congested(q, rw_is_sync(bio->bi_rw));
+		bio_list_add(&nvmeq->sq_cong, bio);
+	}
+	put_nvmeq(nvmeq);
+
+	return 0;
+}
+
+struct sync_cmd_info {
+	struct task_struct *task;
+	u32 result;
+	int status;
+};
+
+static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
+						struct nvme_completion *cqe)
+{
+	struct sync_cmd_info *cmdinfo = ctx;
+	cmdinfo->result = le32_to_cpup(&cqe->result);
+	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
+	wake_up_process(cmdinfo->task);
+}
+
+typedef void (*completion_fn)(struct nvme_queue *, void *,
+						struct nvme_completion *);
+
+static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
+{
+	u16 head, cycle;
+
+	static const completion_fn completions[4] = {
+		[sync_completion_id] = sync_completion,
+		[bio_completion_id]  = bio_completion,
+	};
+
+	head = nvmeq->cq_head;
+	cycle = nvmeq->cq_cycle;
+
+	for (;;) {
+		unsigned long data;
+		void *ptr;
+		unsigned char handler;
+		struct nvme_completion cqe = nvmeq->cqes[head];
+		if ((le16_to_cpu(cqe.status) & 1) != cycle)
+			break;
+		nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
+		if (++head == nvmeq->q_depth) {
+			head = 0;
+			cycle = !cycle;
+		}
+
+		data = free_cmdid(nvmeq, cqe.command_id);
+		handler = data & 3;
+		ptr = (void *)(data & ~3UL);
+		completions[handler](nvmeq, ptr, &cqe);
+	}
+
+	/* If the controller ignores the cq head doorbell and continuously
+	 * writes to the queue, it is theoretically possible to wrap around
+	 * the queue twice and mistakenly return IRQ_NONE.  Linux only
+	 * requires that 0.1% of your interrupts are handled, so this isn't
+	 * a big problem.
+	 */
+	if (head == nvmeq->cq_head && cycle == nvmeq->cq_cycle)
+		return IRQ_NONE;
+
+	writel(head, nvmeq->q_db + 1);
+	nvmeq->cq_head = head;
+	nvmeq->cq_cycle = cycle;
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t nvme_irq(int irq, void *data)
+{
+	return nvme_process_cq(data);
+}
+
+/*
+ * Returns 0 on success.  If the result is negative, it's a Linux error code;
+ * if the result is positive, it's an NVM Express status code
+ */
+static int nvme_submit_sync_cmd(struct nvme_queue *q, struct nvme_command *cmd,
+								u32 *result)
+{
+	int cmdid;
+	struct sync_cmd_info cmdinfo;
+
+	cmdinfo.task = current;
+	cmdinfo.status = -EINTR;
+
+	cmdid = alloc_cmdid_killable(q, &cmdinfo, sync_completion_id);
+	if (cmdid < 0)
+		return cmdid;
+	cmd->common.command_id = cmdid;
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	nvme_submit_cmd(q, cmd);
+	schedule();
+
+	if (result)
+		*result = cmdinfo.result;
+
+	return cmdinfo.status;
+}
+
+static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
+								u32 *result)
+{
+	return nvme_submit_sync_cmd(dev->queues[0], cmd, result);
+}
+
+static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
+{
+	int status;
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.delete_queue.opcode = opcode;
+	c.delete_queue.qid = cpu_to_le16(id);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	if (status)
+		return -EIO;
+	return 0;
+}
+
+static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
+						struct nvme_queue *nvmeq)
+{
+	int status;
+	struct nvme_command c;
+	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
+
+	memset(&c, 0, sizeof(c));
+	c.create_cq.opcode = nvme_admin_create_cq;
+	c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
+	c.create_cq.cqid = cpu_to_le16(qid);
+	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+	c.create_cq.cq_flags = cpu_to_le16(flags);
+	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	if (status)
+		return -EIO;
+	return 0;
+}
+
+static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
+						struct nvme_queue *nvmeq)
+{
+	int status;
+	struct nvme_command c;
+	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+
+	memset(&c, 0, sizeof(c));
+	c.create_sq.opcode = nvme_admin_create_sq;
+	c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
+	c.create_sq.sqid = cpu_to_le16(qid);
+	c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+	c.create_sq.sq_flags = cpu_to_le16(flags);
+	c.create_sq.cqid = cpu_to_le16(qid);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	if (status)
+		return -EIO;
+	return 0;
+}
+
+static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
+{
+	return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
+}
+
+static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
+{
+	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
+}
+
+static void nvme_free_queue(struct nvme_dev *dev, int qid)
+{
+	struct nvme_queue *nvmeq = dev->queues[qid];
+
+	free_irq(dev->entry[nvmeq->cq_vector].vector, nvmeq);
+
+	/* Don't tell the adapter to delete the admin queue */
+	if (qid) {
+		adapter_delete_sq(dev, qid);
+		adapter_delete_cq(dev, qid);
+	}
+
+	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
+				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
+	dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+	kfree(nvmeq);
+}
+
+static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
+							int depth, int vector)
+{
+	struct device *dmadev = &dev->pci_dev->dev;
+	unsigned extra = (depth + BITS_TO_LONGS(depth)) * sizeof(long);
+	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
+	if (!nvmeq)
+		return NULL;
+
+	nvmeq->cqes = dma_alloc_coherent(dmadev, CQ_SIZE(depth),
+					&nvmeq->cq_dma_addr, GFP_KERNEL);
+	if (!nvmeq->cqes)
+		goto free_nvmeq;
+	memset((void *)nvmeq->cqes, 0, CQ_SIZE(depth));
+
+	nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
+					&nvmeq->sq_dma_addr, GFP_KERNEL);
+	if (!nvmeq->sq_cmds)
+		goto free_cqdma;
+
+	nvmeq->q_dmadev = dmadev;
+	spin_lock_init(&nvmeq->q_lock);
+	nvmeq->cq_head = 0;
+	nvmeq->cq_cycle = 1;
+	init_waitqueue_head(&nvmeq->sq_full);
+	bio_list_init(&nvmeq->sq_cong);
+	nvmeq->q_db = &dev->dbs[qid * 2];
+	nvmeq->q_depth = depth;
+	nvmeq->cq_vector = vector;
+
+	return nvmeq;
+
+ free_cqdma:
+	dma_free_coherent(dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes,
+							nvmeq->cq_dma_addr);
+ free_nvmeq:
+	kfree(nvmeq);
+	return NULL;
+}
+
+static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev,
+					int qid, int cq_size, int vector)
+{
+	int result;
+	struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
+
+	result = adapter_alloc_cq(dev, qid, nvmeq);
+	if (result < 0)
+		goto free_nvmeq;
+
+	result = adapter_alloc_sq(dev, qid, nvmeq);
+	if (result < 0)
+		goto release_cq;
+
+	result = request_irq(dev->entry[vector].vector, nvme_irq,
+				IRQF_DISABLED | IRQF_SHARED, "nvme", nvmeq);
+	if (result < 0)
+		goto release_sq;
+
+	return nvmeq;
+
+ release_sq:
+	adapter_delete_sq(dev, qid);
+ release_cq:
+	adapter_delete_cq(dev, qid);
+ free_nvmeq:
+	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
+				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
+	dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+	kfree(nvmeq);
+	return NULL;
+}
+
+static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
+{
+	int result;
+	u32 aqa;
+	struct nvme_queue *nvmeq;
+
+	dev->dbs = ((void __iomem *)dev->bar) + 4096;
+
+	nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
+
+	aqa = nvmeq->q_depth - 1;
+	aqa |= aqa << 16;
+
+	dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
+	dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
+	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+
+	writel(aqa, &dev->bar->aqa);
+	writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
+	writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
+	writel(dev->ctrl_config, &dev->bar->cc);
+
+	while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
+		msleep(100);
+		if (fatal_signal_pending(current))
+			return -EINTR;
+	}
+
+	result = request_irq(dev->entry[0].vector, nvme_irq,
+			IRQF_DISABLED | IRQF_SHARED, "nvme admin", nvmeq);
+	dev->queues[0] = nvmeq;
+	return result;
+}
+
+static int nvme_identify(struct nvme_ns *ns, void __user *addr, int cns)
+{
+	struct nvme_dev *dev = ns->dev;
+	int status;
+	struct nvme_command c;
+	void *page;
+	dma_addr_t dma_addr;
+
+	page = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
+								GFP_KERNEL);
+
+	memset(&c, 0, sizeof(c));
+	c.identify.opcode = nvme_admin_identify;
+	c.identify.nsid = cns ? 0 : cpu_to_le32(ns->ns_id);
+	c.identify.prp1 = cpu_to_le64(dma_addr);
+	c.identify.cns = cpu_to_le32(cns);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+
+	if (status)
+		status = -EIO;
+	else if (copy_to_user(addr, page, 4096))
+		status = -EFAULT;
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, page, dma_addr);
+
+	return status;
+}
+
+static int nvme_get_range_type(struct nvme_ns *ns, void __user *addr)
+{
+	struct nvme_dev *dev = ns->dev;
+	int status;
+	struct nvme_command c;
+	void *page;
+	dma_addr_t dma_addr;
+
+	page = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
+								GFP_KERNEL);
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode = nvme_admin_get_features;
+	c.features.nsid = cpu_to_le32(ns->ns_id);
+	c.features.prp1 = cpu_to_le64(dma_addr);
+	c.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+
+	/* XXX: Assuming first range for now */
+	if (status)
+		status = -EIO;
+	else if (copy_to_user(addr, page, 64))
+		status = -EFAULT;
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, page, dma_addr);
+
+	return status;
+}
+
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
+							unsigned long arg)
+{
+	struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+	switch (cmd) {
+	case NVME_IOCTL_IDENTIFY_NS:
+		return nvme_identify(ns, (void __user *)arg, 0);
+	case NVME_IOCTL_IDENTIFY_CTRL:
+		return nvme_identify(ns, (void __user *)arg, 1);
+	case NVME_IOCTL_GET_RANGE_TYPE:
+		return nvme_get_range_type(ns, (void __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct block_device_operations nvme_fops = {
+	.owner		= THIS_MODULE,
+	.ioctl		= nvme_ioctl,
+};
+
+static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int index,
+			struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
+{
+	struct nvme_ns *ns;
+	struct gendisk *disk;
+	int lbaf;
+
+	if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
+		return NULL;
+
+	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+	if (!ns)
+		return NULL;
+	ns->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!ns->queue)
+		goto out_free_ns;
+	ns->queue->queue_flags = QUEUE_FLAG_DEFAULT | QUEUE_FLAG_NOMERGES |
+				QUEUE_FLAG_NONROT | QUEUE_FLAG_DISCARD;
+	blk_queue_make_request(ns->queue, nvme_make_request);
+	ns->dev = dev;
+	ns->queue->queuedata = ns;
+
+	disk = alloc_disk(NVME_MINORS);
+	if (!disk)
+		goto out_free_queue;
+	ns->ns_id = index;
+	ns->disk = disk;
+	lbaf = id->flbas & 0xf;
+	ns->lba_shift = id->lbaf[lbaf].ds;
+
+	disk->major = nvme_major;
+	disk->minors = NVME_MINORS;
+	disk->first_minor = NVME_MINORS * index;
+	disk->fops = &nvme_fops;
+	disk->private_data = ns;
+	disk->queue = ns->queue;
+	sprintf(disk->disk_name, "nvme%dn%d", dev->instance, index);
+	set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+	return ns;
+
+ out_free_queue:
+	blk_cleanup_queue(ns->queue);
+ out_free_ns:
+	kfree(ns);
+	return NULL;
+}
+
+static void nvme_ns_free(struct nvme_ns *ns)
+{
+	put_disk(ns->disk);
+	blk_cleanup_queue(ns->queue);
+	kfree(ns);
+}
+
+static int set_queue_count(struct nvme_dev *dev, int sq_count, int cq_count)
+{
+	int status;
+	u32 result;
+	struct nvme_command c;
+	u32 q_count = (sq_count - 1) | ((cq_count - 1) << 16);
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode = nvme_admin_get_features;
+	c.features.fid = cpu_to_le32(NVME_FEAT_NUM_QUEUES);
+	c.features.dword11 = cpu_to_le32(q_count);
+
+	status = nvme_submit_admin_cmd(dev, &c, &result);
+	if (status)
+		return -EIO;
+	return min(result & 0xffff, result >> 16) + 1;
+}
+
+/* XXX: Create per-CPU queues */
+static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
+{
+	int this_cpu;
+
+	set_queue_count(dev, 1, 1);
+
+	this_cpu = get_cpu();
+	dev->queues[1] = nvme_create_queue(dev, 1, NVME_Q_DEPTH, this_cpu);
+	put_cpu();
+	if (!dev->queues[1])
+		return -ENOMEM;
+	dev->queue_count++;
+
+	return 0;
+}
+
+static void nvme_free_queues(struct nvme_dev *dev)
+{
+	int i;
+
+	for (i = dev->queue_count - 1; i >= 0; i--)
+		nvme_free_queue(dev, i);
+}
+
+static int __devinit nvme_dev_add(struct nvme_dev *dev)
+{
+	int res, nn, i;
+	struct nvme_ns *ns, *next;
+	void *id;
+	dma_addr_t dma_addr;
+	struct nvme_command cid, crt;
+
+	res = nvme_setup_io_queues(dev);
+	if (res)
+		return res;
+
+	/* XXX: Switch to a SG list once prp2 works */
+	id = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
+								GFP_KERNEL);
+
+	memset(&cid, 0, sizeof(cid));
+	cid.identify.opcode = nvme_admin_identify;
+	cid.identify.nsid = 0;
+	cid.identify.prp1 = cpu_to_le64(dma_addr);
+	cid.identify.cns = cpu_to_le32(1);
+
+	res = nvme_submit_admin_cmd(dev, &cid, NULL);
+	if (res) {
+		res = -EIO;
+		goto out_free;
+	}
+
+	nn = le32_to_cpup(&((struct nvme_id_ctrl *)id)->nn);
+
+	cid.identify.cns = 0;
+	memset(&crt, 0, sizeof(crt));
+	crt.features.opcode = nvme_admin_get_features;
+	crt.features.prp1 = cpu_to_le64(dma_addr + 4096);
+	crt.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
+
+	for (i = 0; i < nn; i++) {
+		cid.identify.nsid = cpu_to_le32(i);
+		res = nvme_submit_admin_cmd(dev, &cid, NULL);
+		if (res)
+			continue;
+
+		if (((struct nvme_id_ns *)id)->ncap == 0)
+			continue;
+
+		crt.features.nsid = cpu_to_le32(i);
+		res = nvme_submit_admin_cmd(dev, &crt, NULL);
+		if (res)
+			continue;
+
+		ns = nvme_alloc_ns(dev, i, id, id + 4096);
+		if (ns)
+			list_add_tail(&ns->list, &dev->namespaces);
+	}
+	list_for_each_entry(ns, &dev->namespaces, list)
+		add_disk(ns->disk);
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+	return 0;
+
+ out_free:
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		list_del(&ns->list);
+		nvme_ns_free(ns);
+	}
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+	return res;
+}
+
+static int nvme_dev_remove(struct nvme_dev *dev)
+{
+	struct nvme_ns *ns, *next;
+
+	/* TODO: wait all I/O finished or cancel them */
+
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		list_del(&ns->list);
+		del_gendisk(ns->disk);
+		nvme_ns_free(ns);
+	}
+
+	nvme_free_queues(dev);
+
+	return 0;
+}
+
+/* XXX: Use an ida or something to let remove / add work correctly */
+static void nvme_set_instance(struct nvme_dev *dev)
+{
+	static int instance;
+	dev->instance = instance++;
+}
+
+static void nvme_release_instance(struct nvme_dev *dev)
+{
+}
+
+static int __devinit nvme_probe(struct pci_dev *pdev,
+						const struct pci_device_id *id)
+{
+	int result = -ENOMEM;
+	struct nvme_dev *dev;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	dev->entry = kcalloc(num_possible_cpus(), sizeof(*dev->entry),
+								GFP_KERNEL);
+	if (!dev->entry)
+		goto free;
+	dev->queues = kcalloc(2, sizeof(void *), GFP_KERNEL);
+	if (!dev->queues)
+		goto free;
+
+	INIT_LIST_HEAD(&dev->namespaces);
+	dev->pci_dev = pdev;
+	pci_set_drvdata(pdev, dev);
+	dma_set_mask(&dev->pci_dev->dev, DMA_BIT_MASK(64));
+	nvme_set_instance(dev);
+
+	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
+	if (!dev->bar) {
+		result = -ENOMEM;
+		goto disable;
+	}
+
+	result = nvme_configure_admin_queue(dev);
+	if (result)
+		goto unmap;
+	dev->queue_count++;
+
+	result = nvme_dev_add(dev);
+	if (result)
+		goto delete;
+	return 0;
+
+ delete:
+	nvme_free_queues(dev);
+ unmap:
+	iounmap(dev->bar);
+ disable:
+	pci_disable_msix(pdev);
+	nvme_release_instance(dev);
+ free:
+	kfree(dev->queues);
+	kfree(dev->entry);
+	kfree(dev);
+	return result;
+}
+
+static void __devexit nvme_remove(struct pci_dev *pdev)
+{
+	struct nvme_dev *dev = pci_get_drvdata(pdev);
+	nvme_dev_remove(dev);
+	pci_disable_msix(pdev);
+	iounmap(dev->bar);
+	nvme_release_instance(dev);
+	kfree(dev->queues);
+	kfree(dev->entry);
+	kfree(dev);
+}
+
+/* These functions are yet to be implemented */
+#define nvme_error_detected NULL
+#define nvme_dump_registers NULL
+#define nvme_link_reset NULL
+#define nvme_slot_reset NULL
+#define nvme_error_resume NULL
+#define nvme_suspend NULL
+#define nvme_resume NULL
+
+static struct pci_error_handlers nvme_err_handler = {
+	.error_detected	= nvme_error_detected,
+	.mmio_enabled	= nvme_dump_registers,
+	.link_reset	= nvme_link_reset,
+	.slot_reset	= nvme_slot_reset,
+	.resume		= nvme_error_resume,
+};
+
+/* Move to pci_ids.h later */
+#define PCI_CLASS_STORAGE_EXPRESS	0x010802
+
+static DEFINE_PCI_DEVICE_TABLE(nvme_id_table) = {
+	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, nvme_id_table);
+
+static struct pci_driver nvme_driver = {
+	.name		= "nvme",
+	.id_table	= nvme_id_table,
+	.probe		= nvme_probe,
+	.remove		= __devexit_p(nvme_remove),
+	.suspend	= nvme_suspend,
+	.resume		= nvme_resume,
+	.err_handler	= &nvme_err_handler,
+};
+
+static int __init nvme_init(void)
+{
+	int result;
+
+	nvme_major = register_blkdev(nvme_major, "nvme");
+	if (nvme_major <= 0)
+		return -EBUSY;
+
+	result = pci_register_driver(&nvme_driver);
+	if (!result)
+		return 0;
+
+	unregister_blkdev(nvme_major, "nvme");
+	return result;
+}
+
+static void __exit nvme_exit(void)
+{
+	pci_unregister_driver(&nvme_driver);
+	unregister_blkdev(nvme_major, "nvme");
+}
+
+MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.1");
+module_init(nvme_init);
+module_exit(nvme_exit);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
new file mode 100644
index 000000000000..9ba53584f722
--- /dev/null
+++ b/include/linux/nvme.h
@@ -0,0 +1,343 @@
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _LINUX_NVME_H
+#define _LINUX_NVME_H
+
+#include <linux/types.h>
+
+struct nvme_bar {
+	__u64			cap;	/* Controller Capabilities */
+	__u32			vs;	/* Version */
+	__u32			ims;	/* Interrupt Mask Set */
+	__u32			imc;	/* Interrupt Mask Clear */
+	__u32			cc;	/* Controller Configuration */
+	__u32			csts;	/* Controller Status */
+	__u32			aqa;	/* Admin Queue Attributes */
+	__u64			asq;	/* Admin SQ Base Address */
+	__u64			acq;	/* Admin CQ Base Address */
+};
+
+enum {
+	NVME_CC_ENABLE		= 1 << 0,
+	NVME_CC_CSS_NVM		= 0 << 4,
+	NVME_CC_MPS_SHIFT	= 7,
+	NVME_CC_ARB_RR		= 0 << 11,
+	NVME_CC_ARB_WRRU	= 1 << 11,
+	NVME_CC_ARB_VS		= 3 << 11,
+	NVME_CC_SHN_NONE	= 0 << 13,
+	NVME_CC_SHN_NORMAL	= 1 << 13,
+	NVME_CC_SHN_ABRUPT	= 2 << 13,
+	NVME_CSTS_RDY		= 1 << 0,
+	NVME_CSTS_CFS		= 1 << 1,
+	NVME_CSTS_SHST_NORMAL	= 0 << 2,
+	NVME_CSTS_SHST_OCCUR	= 1 << 2,
+	NVME_CSTS_SHST_CMPLT	= 2 << 2,
+};
+
+#define NVME_VS(major, minor)	(major << 16 | minor)
+
+struct nvme_id_ctrl {
+	__le16			vid;
+	__le16			ssvid;
+	char			sn[20];
+	char			mn[40];
+	char			fr[8];
+	__le32			nn;
+	__u8			rab;
+	__u8			rsvd77[178];
+	__le16			oacs;
+	__u8			acl;
+	__u8			aerl;
+	__u8			frmw;
+	__u8			lpa;
+	__u8			elpe;
+	__u8			npss;
+	__u8			rsvd264[248];
+	__le64			psd[32];
+	__le16			oncs;
+	__le16			fuses;
+	__u8			fna;
+	__u8			vwc;
+	__le16			awun;
+	__le16			awupf;
+	__u8			rsvd778[246];
+	__u8			cmdset[2048];
+	__u8			vs[1024];
+};
+
+struct nvme_lbaf {
+	__le16			ms;
+	__u8			ds;
+	__u8			rp;
+};
+
+struct nvme_id_ns {
+	__le64			nsze;
+	__le64			ncap;
+	__le64			nuse;
+	__u8			nsfeat;
+	__u8			nlbaf;
+	__u8			flbas;
+	__u8			mc;
+	__u8			dpc;
+	__u8			dps;
+	__u8			rsvd30[98];
+	struct nvme_lbaf	lbaf[16];
+	__u8			rsvd192[192];
+	__u8			vs[3712];
+};
+
+enum {
+	NVME_NS_FEAT_THIN	= 1 << 0,
+	NVME_LBAF_RP_BEST	= 0,
+	NVME_LBAF_RP_BETTER	= 1,
+	NVME_LBAF_RP_GOOD	= 2,
+	NVME_LBAF_RP_DEGRADED	= 3,
+};
+
+struct nvme_lba_range_type {
+	__u8			type;
+	__u8			attributes;
+	__u8			rsvd2[14];
+	__u64			slba;
+	__u64			nlb;
+	__u8			guid[16];
+	__u8			rsvd48[16];
+};
+
+enum {
+	NVME_LBART_TYPE_FS	= 0x01,
+	NVME_LBART_TYPE_RAID	= 0x02,
+	NVME_LBART_TYPE_CACHE	= 0x03,
+	NVME_LBART_TYPE_SWAP	= 0x04,
+
+	NVME_LBART_ATTRIB_TEMP	= 1 << 0,
+	NVME_LBART_ATTRIB_HIDE	= 1 << 1,
+};
+
+/* I/O commands */
+
+enum nvme_opcode {
+	nvme_cmd_flush		= 0x00,
+	nvme_cmd_write		= 0x01,
+	nvme_cmd_read		= 0x02,
+	nvme_cmd_write_uncor	= 0x04,
+	nvme_cmd_compare	= 0x05,
+	nvme_cmd_dsm		= 0x09,
+};
+
+struct nvme_rw_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	__le64			prp1;
+	__le64			prp2;
+	__le64			slba;
+	__le16			length;
+	__le16			control;
+	__le32			dsmgmt;
+	__le32			reftag;
+	__le16			apptag;
+	__le16			appmask;
+};
+
+enum {
+	NVME_RW_LR			= 1 << 15,
+	NVME_RW_FUA			= 1 << 14,
+	NVME_RW_DSM_FREQ_UNSPEC		= 0,
+	NVME_RW_DSM_FREQ_TYPICAL	= 1,
+	NVME_RW_DSM_FREQ_RARE		= 2,
+	NVME_RW_DSM_FREQ_READS		= 3,
+	NVME_RW_DSM_FREQ_WRITES		= 4,
+	NVME_RW_DSM_FREQ_RW		= 5,
+	NVME_RW_DSM_FREQ_ONCE		= 6,
+	NVME_RW_DSM_FREQ_PREFETCH	= 7,
+	NVME_RW_DSM_FREQ_TEMP		= 8,
+	NVME_RW_DSM_LATENCY_NONE	= 0 << 4,
+	NVME_RW_DSM_LATENCY_IDLE	= 1 << 4,
+	NVME_RW_DSM_LATENCY_NORM	= 2 << 4,
+	NVME_RW_DSM_LATENCY_LOW		= 3 << 4,
+	NVME_RW_DSM_SEQ_REQ		= 1 << 6,
+	NVME_RW_DSM_COMPRESSED		= 1 << 7,
+};
+
+/* Admin commands */
+
+enum nvme_admin_opcode {
+	nvme_admin_delete_sq		= 0x00,
+	nvme_admin_create_sq		= 0x01,
+	nvme_admin_get_features		= 0x02,
+	nvme_admin_delete_cq		= 0x04,
+	nvme_admin_create_cq		= 0x05,
+	nvme_admin_identify		= 0x06,
+	nvme_admin_abort_cmd		= 0x08,
+	nvme_admin_set_features		= 0x09,
+	nvme_admin_get_log_page		= 0x0a,
+	nvme_admin_async_event		= 0x0c,
+	nvme_admin_download_fw		= 0x0d,
+	nvme_admin_security_recv	= 0x0e,
+	nvme_admin_format_nvm		= 0x10,
+	nvme_admin_security_send	= 0x11,
+	nvme_admin_activate_fw		= 0x14,
+};
+
+enum {
+	NVME_QUEUE_PHYS_CONTIG	= (1 << 0),
+	NVME_CQ_IRQ_ENABLED	= (1 << 1),
+	NVME_SQ_PRIO_URGENT	= (0 << 1),
+	NVME_SQ_PRIO_HIGH	= (1 << 1),
+	NVME_SQ_PRIO_MEDIUM	= (2 << 1),
+	NVME_SQ_PRIO_LOW	= (3 << 1),
+	NVME_FEAT_ARBITRATION	= 0x01,
+	NVME_FEAT_POWER_MGMT	= 0x02,
+	NVME_FEAT_LBA_RANGE	= 0x03,
+	NVME_FEAT_TEMP_THRESH	= 0x04,
+	NVME_FEAT_ERR_RECOVERY	= 0x05,
+	NVME_FEAT_VOLATILE_WC	= 0x06,
+	NVME_FEAT_NUM_QUEUES	= 0x07,
+	NVME_FEAT_IRQ_COALESCE	= 0x08,
+	NVME_FEAT_IRQ_CONFIG	= 0x09,
+	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
+	NVME_FEAT_ASYNC_EVENT	= 0x0b,
+	NVME_FEAT_SW_PROGRESS	= 0x0c,
+};
+
+struct nvme_identify {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			cns;
+	__u32			rsvd11[5];
+};
+
+struct nvme_features {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			fid;
+	__le32			dword11;
+	__u32			rsvd12[4];
+};
+
+struct nvme_create_cq {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			rsvd1[5];
+	__le64			prp1;
+	__u64			rsvd8;
+	__le16			cqid;
+	__le16			qsize;
+	__le16			cq_flags;
+	__le16			irq_vector;
+	__u32			rsvd12[4];
+};
+
+struct nvme_create_sq {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			rsvd1[5];
+	__le64			prp1;
+	__u64			rsvd8;
+	__le16			sqid;
+	__le16			qsize;
+	__le16			sq_flags;
+	__le16			cqid;
+	__le32			rsvd12[4];
+};
+
+struct nvme_delete_queue {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[9];
+	__le16			qid;
+	__le16			rsvd10;
+	__le32			rsvd11[5];
+};
+
+struct nvme_common_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u32			rsvd2[14];
+};
+
+struct nvme_command {
+	union {
+		struct nvme_common_command common;
+		struct nvme_rw_command rw;
+		struct nvme_identify identify;
+		struct nvme_features features;
+		struct nvme_create_cq create_cq;
+		struct nvme_create_sq create_sq;
+		struct nvme_delete_queue delete_queue;
+	};
+};
+
+/* XXX: Sync with spec */
+enum {
+	NVME_SC_SUCCESS			= 0x0,
+	NVME_SC_INVALID_OPCODE		= 0x1,
+	NVME_SC_INVALID_FIELD		= 0x2,
+	NVME_SC_CMDID_CONFLICT		= 0x3,
+	NVME_SC_DATA_XFER_ERROR		= 0x4,
+	NVME_SC_POWER_LOSS		= 0x5,
+	NVME_SC_INTERNAL		= 0x6,
+	NVME_SC_ABORT_REQ		= 0x7,
+	NVME_SC_ABORT_QUEUE		= 0x8,
+	NVME_SC_FUSED_FAIL		= 0x9,
+	NVME_SC_FUSED_MISSING		= 0xa,
+	NVME_SC_LBA_RANGE		= 0x80,
+	NVME_SC_CAP_EXCEEDED		= 0x81,
+	NVME_SC_NS_NOT_READY		= 0x82,
+	NVME_SC_CQ_INVALID		= 0x100,
+	NVME_SC_QID_INVALID		= 0x101,
+	NVME_SC_QUEUE_SIZE		= 0x102,
+	NVME_SC_WRITE_FAULT		= 0x280,
+	NVME_SC_READ_ERROR		= 0x281,
+};
+
+struct nvme_completion {
+	__le32	result;		/* Used by admin commands to return data */
+	__le32	rsvd;
+	__le16	sq_head;	/* how much of this queue may be reclaimed */
+	__le16	sq_id;		/* submission queue that generated this entry */
+	__u16	command_id;	/* of the command which completed */
+	__le16	status;		/* did the command fail, and if so, why? */
+};
+
+#define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
+#define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
+#define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
+
+#endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 7b4fe9b1cb4b9a6f4ae23a12ef96d08d96e2a5da Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 26 Jan 2011 10:01:21 -0500
Subject: NVMe: Make nvme_common_command more featureful

Add prp1, prp2 and the metadata prp to the common command, since the
fields are generally used this way.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 9ba53584f722..1c0b5ef08959 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -142,6 +142,18 @@ enum nvme_opcode {
 	nvme_cmd_dsm		= 0x09,
 };
 
+struct nvme_common_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	__le64			prp1;
+	__le64			prp2;
+	__u32			rsvd10[6];
+};
+
 struct nvme_rw_command {
 	__u8			opcode;
 	__u8			flags;
@@ -284,14 +296,6 @@ struct nvme_delete_queue {
 	__le32			rsvd11[5];
 };
 
-struct nvme_common_command {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__le32			nsid;
-	__u32			rsvd2[14];
-};
-
 struct nvme_command {
 	union {
 		struct nvme_common_command common;
-- 
cgit v1.2.3


From a53295b6998f62d961c29e54051c1cf1d738c2b3 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 1 Feb 2011 16:13:29 -0500
Subject: NVMe: Add NVME_IOCTL_SUBMIT_IO

Allow userspace to submit synchronous I/O like the SCSI sg interface does.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h | 18 ++++++++++++++++++
 2 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index f44d6cd87ea2..40fb2e1bdfe4 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -780,6 +780,47 @@ static int nvme_get_range_type(struct nvme_ns *ns, unsigned long addr)
 	return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
 }
 
+static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+{
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_queue *nvmeq;
+	struct nvme_user_io io;
+	struct nvme_command c;
+	unsigned length;
+	u32 result;
+	int nents, status;
+	struct scatterlist *sg;
+
+	if (copy_from_user(&io, uio, sizeof(io)))
+		return -EFAULT;
+	length = io.nblocks << io.block_shift;
+	nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length, &sg);
+	if (nents < 0)
+		return nents;
+
+	memset(&c, 0, sizeof(c));
+	c.rw.opcode = io.opcode;
+	c.rw.flags = io.flags;
+	c.rw.nsid = cpu_to_le32(io.nsid);
+	c.rw.slba = cpu_to_le64(io.slba);
+	c.rw.length = cpu_to_le16(io.nblocks - 1);
+	c.rw.control = cpu_to_le16(io.control);
+	c.rw.dsmgmt = cpu_to_le16(io.dsmgmt);
+	c.rw.reftag = cpu_to_le32(io.reftag);	/* XXX: endian? */
+	c.rw.apptag = cpu_to_le16(io.apptag);
+	c.rw.appmask = cpu_to_le16(io.appmask);
+	/* XXX: metadata */
+	nvme_setup_prps(&c.common, sg, length);
+
+	nvmeq = get_nvmeq(ns);
+	status = nvme_submit_sync_cmd(nvmeq, &c, &result);
+	put_nvmeq(nvmeq);
+
+	nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg, nents);
+	put_user(result, &uio->result);
+	return status;
+}
+
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 							unsigned long arg)
 {
@@ -792,6 +833,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 		return nvme_identify(ns, arg, 1);
 	case NVME_IOCTL_GET_RANGE_TYPE:
 		return nvme_get_range_type(ns, arg);
+	case NVME_IOCTL_SUBMIT_IO:
+		return nvme_submit_io(ns, (void __user *)arg);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 1c0b5ef08959..0aaecb059d14 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -340,8 +340,26 @@ struct nvme_completion {
 	__le16	status;		/* did the command fail, and if so, why? */
 };
 
+struct nvme_user_io {
+	__u8	opcode;
+	__u8	flags;
+	__u16	control;
+	__u32	nsid;
+	__u64	metadata;
+	__u64	addr;
+	__u64	slba;
+	__u16	nblocks;
+	__u16	block_shift;
+	__u32	dsmgmt;
+	__u32	reftag;
+	__u16	apptag;
+	__u16	appmask;
+	__u32	result;
+};
+
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
+#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_rw_command)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 7a63e07b9a98b77dd075e06b93c1d8dc871ddad5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 3 Feb 2011 09:20:57 -0500
Subject: NVMe: Add remaining status codes

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0aaecb059d14..dbbdc126401b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -321,14 +321,29 @@ enum {
 	NVME_SC_ABORT_QUEUE		= 0x8,
 	NVME_SC_FUSED_FAIL		= 0x9,
 	NVME_SC_FUSED_MISSING		= 0xa,
+	NVME_SC_INVALID_NS		= 0xb,
 	NVME_SC_LBA_RANGE		= 0x80,
 	NVME_SC_CAP_EXCEEDED		= 0x81,
 	NVME_SC_NS_NOT_READY		= 0x82,
 	NVME_SC_CQ_INVALID		= 0x100,
 	NVME_SC_QID_INVALID		= 0x101,
 	NVME_SC_QUEUE_SIZE		= 0x102,
+	NVME_SC_ABORT_LIMIT		= 0x103,
+	NVME_SC_ABORT_MISSING		= 0x104,
+	NVME_SC_ASYNC_LIMIT		= 0x105,
+	NVME_SC_FIRMWARE_SLOT		= 0x106,
+	NVME_SC_FIRMWARE_IMAGE		= 0x107,
+	NVME_SC_INVALID_VECTOR		= 0x108,
+	NVME_SC_INVALID_LOG_PAGE	= 0x109,
+	NVME_SC_INVALID_FORMAT		= 0x10a,
+	NVME_SC_BAD_ATTRIBUTES		= 0x180,
 	NVME_SC_WRITE_FAULT		= 0x280,
 	NVME_SC_READ_ERROR		= 0x281,
+	NVME_SC_GUARD_CHECK		= 0x282,
+	NVME_SC_APPTAG_CHECK		= 0x283,
+	NVME_SC_REFTAG_CHECK		= 0x284,
+	NVME_SC_COMPARE_FAILED		= 0x285,
+	NVME_SC_ACCESS_DENIED		= 0x286,
 };
 
 struct nvme_completion {
-- 
cgit v1.2.3


From 6ee44cdced04a53dc4f27eb97067e6cd33784726 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 3 Feb 2011 10:58:26 -0500
Subject: NVMe: Add download / activate firmware ioctls

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h | 33 +++++++++++++++++++++++++++------
 2 files changed, 72 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 744db3877c42..7cdf7f69cdcd 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -829,6 +829,47 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	return status;
 }
 
+static int nvme_download_firmware(struct nvme_ns *ns,
+						struct nvme_dlfw __user *udlfw)
+{
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_dlfw dlfw;
+	struct nvme_command c;
+	int nents, status;
+	struct scatterlist *sg;
+
+	if (copy_from_user(&dlfw, udlfw, sizeof(dlfw)))
+		return -EFAULT;
+	if (dlfw.length >= (1 << 30))
+		return -EINVAL;
+
+	nents = nvme_map_user_pages(dev, 1, dlfw.addr, dlfw.length * 4, &sg);
+	if (nents < 0)
+		return nents;
+
+	memset(&c, 0, sizeof(c));
+	c.dlfw.opcode = nvme_admin_download_fw;
+	c.dlfw.numd = cpu_to_le32(dlfw.length);
+	c.dlfw.offset = cpu_to_le32(dlfw.offset);
+	nvme_setup_prps(&c.common, sg, dlfw.length * 4);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	nvme_unmap_user_pages(dev, 0, dlfw.addr, dlfw.length * 4, sg, nents);
+	return status;
+}
+
+static int nvme_activate_firmware(struct nvme_ns *ns, unsigned long arg)
+{
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_activate_fw;
+	c.common.rsvd10[0] = cpu_to_le32(arg);
+
+	return nvme_submit_admin_cmd(dev, &c, NULL);
+}
+
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 							unsigned long arg)
 {
@@ -843,6 +884,10 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 		return nvme_get_range_type(ns, arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
+	case NVME_IOCTL_DOWNLOAD_FW:
+		return nvme_download_firmware(ns, (void __user *)arg);
+	case NVME_IOCTL_ACTIVATE_FW:
+		return nvme_activate_firmware(ns, arg);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index dbbdc126401b..8eed0e432eef 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -262,7 +262,7 @@ struct nvme_create_cq {
 	__u8			opcode;
 	__u8			flags;
 	__u16			command_id;
-	__le32			rsvd1[5];
+	__u32			rsvd1[5];
 	__le64			prp1;
 	__u64			rsvd8;
 	__le16			cqid;
@@ -276,14 +276,14 @@ struct nvme_create_sq {
 	__u8			opcode;
 	__u8			flags;
 	__u16			command_id;
-	__le32			rsvd1[5];
+	__u32			rsvd1[5];
 	__le64			prp1;
 	__u64			rsvd8;
 	__le16			sqid;
 	__le16			qsize;
 	__le16			sq_flags;
 	__le16			cqid;
-	__le32			rsvd12[4];
+	__u32			rsvd12[4];
 };
 
 struct nvme_delete_queue {
@@ -292,8 +292,20 @@ struct nvme_delete_queue {
 	__u16			command_id;
 	__u32			rsvd1[9];
 	__le16			qid;
-	__le16			rsvd10;
-	__le32			rsvd11[5];
+	__u16			rsvd10;
+	__u32			rsvd11[5];
+};
+
+struct nvme_download_firmware {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[5];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			numd;
+	__le32			offset;
+	__u32			rsvd12[4];
 };
 
 struct nvme_command {
@@ -305,6 +317,7 @@ struct nvme_command {
 		struct nvme_create_cq create_cq;
 		struct nvme_create_sq create_sq;
 		struct nvme_delete_queue delete_queue;
+		struct nvme_download_firmware dlfw;
 	};
 };
 
@@ -348,7 +361,7 @@ enum {
 
 struct nvme_completion {
 	__le32	result;		/* Used by admin commands to return data */
-	__le32	rsvd;
+	__u32	rsvd;
 	__le16	sq_head;	/* how much of this queue may be reclaimed */
 	__le16	sq_id;		/* submission queue that generated this entry */
 	__u16	command_id;	/* of the command which completed */
@@ -372,9 +385,17 @@ struct nvme_user_io {
 	__u32	result;
 };
 
+struct nvme_dlfw {
+	__u64	addr;
+	__u32	length;	/* In dwords */
+	__u32	offset;	/* In dwords */
+};
+
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
 #define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_rw_command)
+#define NVME_IOCTL_DOWNLOAD_FW	_IOR('N', 0x44, struct nvme_dlfw)
+#define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 897cfe1ce7db152fa6dde576f4213a6160bf6502 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Mon, 14 Feb 2011 12:20:15 -0500
Subject: NVMe: Update BAR structure to match the current spec

Add two reserved registers in the middle of the BAR to match the 1.0
spec plus ECN 0002.

Also rename IMC and ISC to INTMC and INTSC to conform with the spec.
We still don't need to use them :-)

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8eed0e432eef..757faa71666e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -24,10 +24,12 @@
 struct nvme_bar {
 	__u64			cap;	/* Controller Capabilities */
 	__u32			vs;	/* Version */
-	__u32			ims;	/* Interrupt Mask Set */
-	__u32			imc;	/* Interrupt Mask Clear */
+	__u32			intms;	/* Interrupt Mask Set */
+	__u32			intmc;	/* Interrupt Mask Clear */
 	__u32			cc;	/* Controller Configuration */
+	__u32			rsvd1;	/* Reserved */
 	__u32			csts;	/* Controller Status */
+	__u32			rsvd2;	/* Reserved */
 	__u32			aqa;	/* Admin Queue Attributes */
 	__u64			asq;	/* Admin SQ Base Address */
 	__u64			acq;	/* Admin CQ Base Address */
-- 
cgit v1.2.3


From 2ddc4f74d8adcf3e1cdec7f3e72d19b5c878597c Mon Sep 17 00:00:00 2001
From: Krzysztof Wierzbicki <krzysztof.wierzbicki@intel.com>
Date: Mon, 28 Feb 2011 08:27:13 +0100
Subject: NVMe: Update admin opcodes to match the 1.0RC spec

Signed-off-by: Krzysztof Wierzbicki <krzysztof.wierzbicki@intel.com>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 757faa71666e..c46a9b7988fb 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -199,19 +199,19 @@ enum {
 enum nvme_admin_opcode {
 	nvme_admin_delete_sq		= 0x00,
 	nvme_admin_create_sq		= 0x01,
-	nvme_admin_get_features		= 0x02,
+	nvme_admin_get_log_page		= 0x02,
 	nvme_admin_delete_cq		= 0x04,
 	nvme_admin_create_cq		= 0x05,
 	nvme_admin_identify		= 0x06,
 	nvme_admin_abort_cmd		= 0x08,
 	nvme_admin_set_features		= 0x09,
-	nvme_admin_get_log_page		= 0x0a,
+	nvme_admin_get_features		= 0x0a,
 	nvme_admin_async_event		= 0x0c,
-	nvme_admin_download_fw		= 0x0d,
-	nvme_admin_security_recv	= 0x0e,
-	nvme_admin_format_nvm		= 0x10,
-	nvme_admin_security_send	= 0x11,
-	nvme_admin_activate_fw		= 0x14,
+	nvme_admin_activate_fw		= 0x10,
+	nvme_admin_download_fw		= 0x11,
+	nvme_admin_format_nvm		= 0x80,
+	nvme_admin_security_send	= 0x81,
+	nvme_admin_security_recv	= 0x82,
 };
 
 enum {
-- 
cgit v1.2.3


From 19e899b2f9f89f4a290dd5c9c24d15987a18ab21 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 16 Mar 2011 16:29:24 -0400
Subject: NVMe: Remove outdated comments

The head can never overrun the tail since we won't allocate enough command
IDs to let that happen.  The status codes are in sync with the spec.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 1 -
 include/linux/nvme.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 740a9c1b81aa..d4f95eb51dc1 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -245,7 +245,6 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
 {
 	unsigned long flags;
 	u16 tail;
-	/* XXX: Need to check tail isn't going to overrun head */
 	spin_lock_irqsave(&nvmeq->q_lock, flags);
 	tail = nvmeq->sq_tail;
 	memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c46a9b7988fb..6b5a8d19daf5 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -323,7 +323,6 @@ struct nvme_command {
 	};
 };
 
-/* XXX: Sync with spec */
 enum {
 	NVME_SC_SUCCESS			= 0x0,
 	NVME_SC_INVALID_OPCODE		= 0x1,
-- 
cgit v1.2.3


From 9d4af1b7796ba02b73a79a8694399e5a3cd1c55d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Sun, 20 Mar 2011 07:27:10 -0400
Subject: NVMe: Correct the definitions of two ioctls

NVME_IOCTL_SUBMIT_IO has a struct nvme_user_io, not a struct nvme_rw_command
as a parameter, and NVME_IOCTL_DOWNLOAD_FW is a Write, not a Read.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6b5a8d19daf5..fd10d597cca7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -395,8 +395,8 @@ struct nvme_dlfw {
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
-#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_rw_command)
-#define NVME_IOCTL_DOWNLOAD_FW	_IOR('N', 0x44, struct nvme_dlfw)
+#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_user_io)
+#define NVME_IOCTL_DOWNLOAD_FW	_IOW('N', 0x44, struct nvme_dlfw)
 #define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 6c7d49455ceb63064f992347d9185ff5bf43497a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Mon, 21 Mar 2011 09:48:57 -0400
Subject: NVMe: Change the definition of nvme_user_io

The read and write commands don't define a 'result', so there's no need
to copy it back to userspace.

Remove the ability of the ioctl to submit commands to a different
namespace; it's just asking for trouble, and the use case I have in mind
will be addressed througha  different ioctl in the future.  That removes
the need for both the block_shift and nsid arguments.

Check that the opcode is one of 'read' or 'write'.  Future opcodes may
be added in the future, but we will need a different structure definition
for them.

The nblocks field is redefined to be 0-based.  This allows the user to
request the full 65536 blocks.

Don't byteswap the reftag, apptag and appmask.  Martin Petersen tells
me these are calculated in big-endian and are transmitted to the device
in big-endian.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 27 +++++++++++++++++----------
 include/linux/nvme.h |  8 +++-----
 2 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index d0b52622e261..90a96ec8a596 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -1035,29 +1035,37 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	struct nvme_user_io io;
 	struct nvme_command c;
 	unsigned length;
-	u32 result;
 	int nents, status;
 	struct scatterlist *sg;
 	struct nvme_prps *prps;
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
-	length = io.nblocks << io.block_shift;
-	nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length, &sg);
+	length = (io.nblocks + 1) << ns->lba_shift;
+
+	switch (io.opcode) {
+	case nvme_cmd_write:
+	case nvme_cmd_read:
+		nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr,
+								length, &sg);
+	default:
+		return -EFAULT;
+	}
+
 	if (nents < 0)
 		return nents;
 
 	memset(&c, 0, sizeof(c));
 	c.rw.opcode = io.opcode;
 	c.rw.flags = io.flags;
-	c.rw.nsid = cpu_to_le32(io.nsid);
+	c.rw.nsid = cpu_to_le32(ns->ns_id);
 	c.rw.slba = cpu_to_le64(io.slba);
-	c.rw.length = cpu_to_le16(io.nblocks - 1);
+	c.rw.length = cpu_to_le16(io.nblocks);
 	c.rw.control = cpu_to_le16(io.control);
 	c.rw.dsmgmt = cpu_to_le16(io.dsmgmt);
-	c.rw.reftag = cpu_to_le32(io.reftag);	/* XXX: endian? */
-	c.rw.apptag = cpu_to_le16(io.apptag);
-	c.rw.appmask = cpu_to_le16(io.appmask);
+	c.rw.reftag = io.reftag;
+	c.rw.apptag = io.apptag;
+	c.rw.appmask = io.appmask;
 	/* XXX: metadata */
 	prps = nvme_setup_prps(dev, &c.common, sg, length);
 
@@ -1069,11 +1077,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	 * additional races since q_lock already protects against other CPUs.
 	 */
 	put_nvmeq(nvmeq);
-	status = nvme_submit_sync_cmd(nvmeq, &c, &result, IO_TIMEOUT);
+	status = nvme_submit_sync_cmd(nvmeq, &c, NULL, IO_TIMEOUT);
 
 	nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg, nents);
 	nvme_free_prps(dev, prps);
-	put_user(result, &uio->result);
 	return status;
 }
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index fd10d597cca7..347ad5f9a721 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -373,17 +373,15 @@ struct nvme_user_io {
 	__u8	opcode;
 	__u8	flags;
 	__u16	control;
-	__u32	nsid;
+	__u16	nblocks;
+	__u16	rsvd;
 	__u64	metadata;
 	__u64	addr;
 	__u64	slba;
-	__u16	nblocks;
-	__u16	block_shift;
 	__u32	dsmgmt;
 	__u32	reftag;
 	__u16	apptag;
 	__u16	appmask;
-	__u32	result;
 };
 
 struct nvme_dlfw {
@@ -395,7 +393,7 @@ struct nvme_dlfw {
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
-#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_user_io)
+#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x43, struct nvme_user_io)
 #define NVME_IOCTL_DOWNLOAD_FW	_IOW('N', 0x44, struct nvme_dlfw)
 #define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
 
-- 
cgit v1.2.3


From 7f53f9d2424533256ae86f7df5661a17de743de8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 22 Mar 2011 15:55:45 -0400
Subject: NVMe: Correct the Controller Configuration settings

The arbitration field was extended by one bit, shifting the shutdown
notification bits by one.  Also, the SQ/CQ entry size was made
configurable for future extensions.

Reported-by: Paul Luse <paul.e.luse@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c |  1 +
 include/linux/nvme.h | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index d3eeca5a3c4c..014a7f6e39bc 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -905,6 +905,7 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 	dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
 	dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
 	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
 	writel(0, &dev->bar->cc);
 	writel(aqa, &dev->bar->aqa);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 347ad5f9a721..9d6febb91521 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -41,10 +41,12 @@ enum {
 	NVME_CC_MPS_SHIFT	= 7,
 	NVME_CC_ARB_RR		= 0 << 11,
 	NVME_CC_ARB_WRRU	= 1 << 11,
-	NVME_CC_ARB_VS		= 3 << 11,
-	NVME_CC_SHN_NONE	= 0 << 13,
-	NVME_CC_SHN_NORMAL	= 1 << 13,
-	NVME_CC_SHN_ABRUPT	= 2 << 13,
+	NVME_CC_ARB_VS		= 7 << 11,
+	NVME_CC_SHN_NONE	= 0 << 14,
+	NVME_CC_SHN_NORMAL	= 1 << 14,
+	NVME_CC_SHN_ABRUPT	= 2 << 14,
+	NVME_CC_IOSQES		= 6 << 16,
+	NVME_CC_IOCQES		= 4 << 20,
 	NVME_CSTS_RDY		= 1 << 0,
 	NVME_CSTS_CFS		= 1 << 1,
 	NVME_CSTS_SHST_NORMAL	= 0 << 2,
-- 
cgit v1.2.3


From 22605f96810d073eb74051d0295b6577d6a6a563 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 19 Apr 2011 15:04:20 -0400
Subject: NVMe: Time out initialisation after a few seconds

THe device reports (in its capability register) how long it will take
to initialise.  If that time elapses before the ready bit becomes set,
conclude the device is broken and refuse to initialise it.  Log a nice
error message so the user knows why we did nothing.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 10 ++++++++++
 include/linux/nvme.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index bcc780ac4ec0..57f2b33a47dd 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -893,6 +893,8 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
 	u32 aqa;
+	u64 cap;
+	unsigned long timeout;
 	struct nvme_queue *nvmeq;
 
 	dev->dbs = ((void __iomem *)dev->bar) + 4096;
@@ -915,10 +917,18 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 	writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
 	writel(dev->ctrl_config, &dev->bar->cc);
 
+	cap = readq(&dev->bar->cap);
+	timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+
 	while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
 		msleep(100);
 		if (fatal_signal_pending(current))
 			return -EINTR;
+		if (time_after(jiffies, timeout)) {
+			dev_err(&dev->pci_dev->dev,
+				"Device not ready; aborting initialisation\n");
+			return -ENODEV;
+		}
 	}
 
 	result = queue_request_irq(dev, nvmeq, "nvme admin");
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 9d6febb91521..a19304fefa7d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -35,6 +35,8 @@ struct nvme_bar {
 	__u64			acq;	/* Admin CQ Base Address */
 };
 
+#define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
+
 enum {
 	NVME_CC_ENABLE		= 1 << 0,
 	NVME_CC_CSS_NVM		= 0 << 4,
-- 
cgit v1.2.3


From 6bbf1acddeed0bfb345a5578f9fcada16f1e514f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 20 May 2011 13:03:42 -0400
Subject: NVMe: Rework ioctls

Remove the special-purpose IDENTIFY, GET_RANGE_TYPE, DOWNLOAD_FIRMWARE
and ACTIVATE_FIRMWARE commands.  Replace them with a generic ADMIN_CMD
ioctl that can submit any admin command.

Add a new ID ioctl that returns the namespace ID of the queried device.
It corresponds to the SCSI Idlun ioctl.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 128 ++++++++++++++++-----------------------------------
 include/linux/nvme.h |  34 +++++++++-----
 2 files changed, 63 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index f5e51a6116e3..9e3c724b95c3 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -1033,51 +1033,6 @@ static void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 		put_page(sg_page(&sg[i]));
 }
 
-static int nvme_submit_user_admin_command(struct nvme_dev *dev,
-					unsigned long addr, unsigned length,
-					struct nvme_command *cmd)
-{
-	int err, nents, tmplen = length;
-	struct scatterlist *sg;
-	struct nvme_prps *prps;
-
-	nents = nvme_map_user_pages(dev, 0, addr, length, &sg);
-	if (nents < 0)
-		return nents;
-	prps = nvme_setup_prps(dev, &cmd->common, sg, &tmplen, GFP_KERNEL);
-	if (tmplen != length)
-		err = -ENOMEM;
-	else
-		err = nvme_submit_admin_cmd(dev, cmd, NULL);
-	nvme_unmap_user_pages(dev, 0, addr, length, sg, nents);
-	nvme_free_prps(dev, prps);
-	return err ? -EIO : 0;
-}
-
-static int nvme_identify(struct nvme_ns *ns, unsigned long addr, int cns)
-{
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.identify.opcode = nvme_admin_identify;
-	c.identify.nsid = cns ? 0 : cpu_to_le32(ns->ns_id);
-	c.identify.cns = cpu_to_le32(cns);
-
-	return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
-}
-
-static int nvme_get_range_type(struct nvme_ns *ns, unsigned long addr)
-{
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.features.opcode = nvme_admin_get_features;
-	c.features.nsid = cpu_to_le32(ns->ns_id);
-	c.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
-
-	return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
-}
-
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
 	struct nvme_dev *dev = ns->dev;
@@ -1096,10 +1051,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	switch (io.opcode) {
 	case nvme_cmd_write:
 	case nvme_cmd_read:
+	case nvme_cmd_compare:
 		nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr,
 								length, &sg);
 	default:
-		return -EFAULT;
+		return -EINVAL;
 	}
 
 	if (nents < 0)
@@ -1137,70 +1093,66 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	return status;
 }
 
-static int nvme_download_firmware(struct nvme_ns *ns,
-						struct nvme_dlfw __user *udlfw)
+static int nvme_user_admin_cmd(struct nvme_ns *ns,
+					struct nvme_admin_cmd __user *ucmd)
 {
 	struct nvme_dev *dev = ns->dev;
-	struct nvme_dlfw dlfw;
+	struct nvme_admin_cmd cmd;
 	struct nvme_command c;
-	int nents, status, length;
+	int status, length, nents = 0;
 	struct scatterlist *sg;
-	struct nvme_prps *prps;
+	struct nvme_prps *prps = NULL;
 
-	if (copy_from_user(&dlfw, udlfw, sizeof(dlfw)))
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
-	if (dlfw.length >= (1 << 30))
-		return -EINVAL;
-	length = dlfw.length * 4;
-
-	nents = nvme_map_user_pages(dev, 1, dlfw.addr, length, &sg);
-	if (nents < 0)
-		return nents;
 
 	memset(&c, 0, sizeof(c));
-	c.dlfw.opcode = nvme_admin_download_fw;
-	c.dlfw.numd = cpu_to_le32(dlfw.length);
-	c.dlfw.offset = cpu_to_le32(dlfw.offset);
-	prps = nvme_setup_prps(dev, &c.common, sg, &length, GFP_KERNEL);
-	if (length != dlfw.length * 4)
+	c.common.opcode = cmd.opcode;
+	c.common.flags = cmd.flags;
+	c.common.nsid = cpu_to_le32(cmd.nsid);
+	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+	c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
+	c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
+	c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
+	c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
+	c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
+	c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
+
+	length = cmd.data_len;
+	if (cmd.data_len) {
+		nents = nvme_map_user_pages(dev, 1, cmd.addr, length, &sg);
+		if (nents < 0)
+			return nents;
+		prps = nvme_setup_prps(dev, &c.common, sg, &length, GFP_KERNEL);
+	}
+
+	if (length != cmd.data_len)
 		status = -ENOMEM;
 	else
 		status = nvme_submit_admin_cmd(dev, &c, NULL);
-	nvme_unmap_user_pages(dev, 0, dlfw.addr, dlfw.length * 4, sg, nents);
-	nvme_free_prps(dev, prps);
+	if (cmd.data_len) {
+		nvme_unmap_user_pages(dev, 0, cmd.addr, cmd.data_len, sg,
+									nents);
+		nvme_free_prps(dev, prps);
+	}
 	return status;
 }
 
-static int nvme_activate_firmware(struct nvme_ns *ns, unsigned long arg)
-{
-	struct nvme_dev *dev = ns->dev;
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_activate_fw;
-	c.common.rsvd10[0] = cpu_to_le32(arg);
-
-	return nvme_submit_admin_cmd(dev, &c, NULL);
-}
-
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 							unsigned long arg)
 {
 	struct nvme_ns *ns = bdev->bd_disk->private_data;
 
 	switch (cmd) {
-	case NVME_IOCTL_IDENTIFY_NS:
-		return nvme_identify(ns, arg, 0);
-	case NVME_IOCTL_IDENTIFY_CTRL:
-		return nvme_identify(ns, arg, 1);
-	case NVME_IOCTL_GET_RANGE_TYPE:
-		return nvme_get_range_type(ns, arg);
+	case NVME_IOCTL_ID:
+		return ns->ns_id;
+	case NVME_IOCTL_ADMIN_CMD:
+		return nvme_user_admin_cmd(ns, (void __user *)arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
-	case NVME_IOCTL_DOWNLOAD_FW:
-		return nvme_download_firmware(ns, (void __user *)arg);
-	case NVME_IOCTL_ACTIVATE_FW:
-		return nvme_activate_firmware(ns, arg);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a19304fefa7d..c96ab0f5ef6f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -153,11 +153,11 @@ struct nvme_common_command {
 	__u8			flags;
 	__u16			command_id;
 	__le32			nsid;
-	__u64			rsvd2;
+	__u32			cdw2[2];
 	__le64			metadata;
 	__le64			prp1;
 	__le64			prp2;
-	__u32			rsvd10[6];
+	__u32			cdw10[6];
 };
 
 struct nvme_rw_command {
@@ -388,17 +388,29 @@ struct nvme_user_io {
 	__u16	appmask;
 };
 
-struct nvme_dlfw {
+struct nvme_admin_cmd {
+	__u8	opcode;
+	__u8	flags;
+	__u16	rsvd1;
+	__u32	nsid;
+	__u32	cdw2;
+	__u32	cdw3;
+	__u64	metadata;
 	__u64	addr;
-	__u32	length;	/* In dwords */
-	__u32	offset;	/* In dwords */
+	__u32	metadata_len;
+	__u32	data_len;
+	__u32	cdw10;
+	__u32	cdw11;
+	__u32	cdw12;
+	__u32	cdw13;
+	__u32	cdw14;
+	__u32	cdw15;
+	__u32	timeout_ms;
+	__u32	result;
 };
 
-#define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
-#define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
-#define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
-#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x43, struct nvme_user_io)
-#define NVME_IOCTL_DOWNLOAD_FW	_IOW('N', 0x44, struct nvme_dlfw)
-#define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
+#define NVME_IOCTL_ID		_IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x42, struct nvme_user_io)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From f1938f6e1ee1583c87ec74dc406fdd8694e99ac8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 20 Oct 2011 17:00:41 -0400
Subject: NVMe: Implement doorbell stride capability

The doorbell stride allows devices to spread out their doorbells instead
of packing them tightly.  This feature was added as part of ECN 003.

This patch also enables support for more than 512 queues :-)

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 17 ++++++++++++++---
 include/linux/nvme.h |  1 +
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index cfe5932821d8..a17f80fa3881 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -70,6 +70,7 @@ struct nvme_dev {
 	struct dma_pool *prp_small_pool;
 	int instance;
 	int queue_count;
+	int db_stride;
 	u32 ctrl_config;
 	struct msix_entry *entry;
 	struct nvme_bar __iomem *bar;
@@ -672,7 +673,7 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
 	if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
 		return IRQ_NONE;
 
-	writel(head, nvmeq->q_db + 1);
+	writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride));
 	nvmeq->cq_head = head;
 	nvmeq->cq_phase = phase;
 
@@ -889,7 +890,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	init_waitqueue_head(&nvmeq->sq_full);
 	init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
 	bio_list_init(&nvmeq->sq_cong);
-	nvmeq->q_db = &dev->dbs[qid * 2];
+	nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
 	nvmeq->q_depth = depth;
 	nvmeq->cq_vector = vector;
 
@@ -981,6 +982,7 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 
 	cap = readq(&dev->bar->cap);
 	timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+	dev->db_stride = NVME_CAP_STRIDE(cap);
 
 	while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
 		msleep(100);
@@ -1357,7 +1359,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
 
 static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
 {
-	int result, cpu, i, nr_io_queues;
+	int result, cpu, i, nr_io_queues, db_bar_size;
 
 	nr_io_queues = num_online_cpus();
 	result = set_queue_count(dev, nr_io_queues);
@@ -1369,6 +1371,15 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
 	/* Deregister the admin queue's interrupt */
 	free_irq(dev->entry[0].vector, dev->queues[0]);
 
+	db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
+	if (db_bar_size > 8192) {
+		iounmap(dev->bar);
+		dev->bar = ioremap(pci_resource_start(dev->pci_dev, 0),
+								db_bar_size);
+		dev->dbs = ((void __iomem *)dev->bar) + 4096;
+		dev->queues[0]->q_db = dev->dbs;
+	}
+
 	for (i = 0; i < nr_io_queues; i++)
 		dev->entry[i].entry = i;
 	for (;;) {
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c96ab0f5ef6f..2a2c535c8345 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -36,6 +36,7 @@ struct nvme_bar {
 };
 
 #define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
+#define NVME_CAP_STRIDE(cap)	(((cap) >> 32) & 0xf)
 
 enum {
 	NVME_CC_ENABLE		= 1 << 0,
-- 
cgit v1.2.3


From 010e646ba2fdfc558048a97da746381c35836280 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 4 Nov 2011 16:24:23 -0400
Subject: NVMe: Update Identify Controller data structure

The driver was still using an old definition of Identify Controller
which only came to light once we started using the 'number of namespaces'
field properly.

Reported-by: Nisheeth Bhat <nisheeth.bhat@intel.com>
Reported-by: Khosrow Panah <Khosrow.Panah@idt.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2a2c535c8345..9490a00529f4 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -57,6 +57,18 @@ enum {
 	NVME_CSTS_SHST_CMPLT	= 2 << 2,
 };
 
+struct nvme_id_power_state {
+	__le16			max_power;	/* centiwatts */
+	__u16			rsvd2;
+	__le32			entry_lat;	/* microseconds */
+	__le32			exit_lat;	/* microseconds */
+	__u8			read_tput;
+	__u8			read_lat;
+	__u8			write_tput;
+	__u8			write_lat;
+	__u8			rsvd16[16];
+};
+
 #define NVME_VS(major, minor)	(major << 16 | minor)
 
 struct nvme_id_ctrl {
@@ -65,9 +77,11 @@ struct nvme_id_ctrl {
 	char			sn[20];
 	char			mn[40];
 	char			fr[8];
-	__le32			nn;
 	__u8			rab;
-	__u8			rsvd77[178];
+	__u8			ieee[3];
+	__u8			mic;
+	__u8			mdts;
+	__u8			rsvd78[178];
 	__le16			oacs;
 	__u8			acl;
 	__u8			aerl;
@@ -76,15 +90,18 @@ struct nvme_id_ctrl {
 	__u8			elpe;
 	__u8			npss;
 	__u8			rsvd264[248];
-	__le64			psd[32];
+	__u8			sqes;
+	__u8			cqes;
+	__u8			rsvd514[2];
+	__le32			nn;
 	__le16			oncs;
 	__le16			fuses;
 	__u8			fna;
 	__u8			vwc;
 	__le16			awun;
 	__le16			awupf;
-	__u8			rsvd778[246];
-	__u8			cmdset[2048];
+	__u8			rsvd530[1518];
+	struct nvme_id_power_state	psd[32];
 	__u8			vs[1024];
 };
 
-- 
cgit v1.2.3


From 5ce3e312ec5c11abce13215be70700778bf601f0 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Wed, 31 Aug 2011 14:05:16 +0300
Subject: crypto: GnuPG based MPI lib - header files (part 2)

Adds the multi-precision-integer maths library which was originally taken
from GnuPG and ported to the kernel by (among others) David Howells.
This version is taken from Fedora kernel 2.6.32-71.14.1.el6.
The difference is that checkpatch reported errors and warnings have been fixed.

This library is used to implemenet RSA digital signature verification
used in IMA/EVM integrity protection subsystem.

Due to patch size limitation, the patch is divided into 4 parts.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
---
 include/linux/mpi.h    |  146 +++++
 lib/mpi/longlong.h     | 1478 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/mpi/mpi-inline.h   |  122 ++++
 lib/mpi/mpi-internal.h |  261 +++++++++
 4 files changed, 2007 insertions(+)
 create mode 100644 include/linux/mpi.h
 create mode 100644 lib/mpi/longlong.h
 create mode 100644 lib/mpi/mpi-inline.h
 create mode 100644 lib/mpi/mpi-internal.h

(limited to 'include/linux')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
new file mode 100644
index 000000000000..06f88994ccaa
--- /dev/null
+++ b/include/linux/mpi.h
@@ -0,0 +1,146 @@
+/* mpi.h  -  Multi Precision Integers
+ *	Copyright (C) 1994, 1996, 1998, 1999,
+ *                    2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_H
+#define G10_MPI_H
+
+#include <linux/types.h>
+
+/* DSI defines */
+
+#define SHA1_DIGEST_LENGTH   20
+
+/*end of DSI defines */
+
+#define BYTES_PER_MPI_LIMB	(BITS_PER_LONG / 8)
+#define BITS_PER_MPI_LIMB	BITS_PER_LONG
+
+typedef unsigned long int mpi_limb_t;
+typedef signed long int mpi_limb_signed_t;
+
+struct gcry_mpi {
+	int alloced;		/* array size (# of allocated limbs) */
+	int nlimbs;		/* number of valid limbs */
+	int nbits;		/* the real number of valid bits (info only) */
+	int sign;		/* indicates a negative number */
+	unsigned flags;		/* bit 0: array must be allocated in secure memory space */
+	/* bit 1: not used */
+	/* bit 2: the limb is a pointer to some m_alloced data */
+	mpi_limb_t *d;		/* array with the limbs */
+};
+
+typedef struct gcry_mpi *MPI;
+
+#define MPI_NULL NULL
+
+#define mpi_get_nlimbs(a)     ((a)->nlimbs)
+#define mpi_is_neg(a)	      ((a)->sign)
+
+/*-- mpiutil.c --*/
+MPI mpi_alloc(unsigned nlimbs);
+MPI mpi_alloc_secure(unsigned nlimbs);
+MPI mpi_alloc_like(MPI a);
+void mpi_free(MPI a);
+int mpi_resize(MPI a, unsigned nlimbs);
+int mpi_copy(MPI *copy, const MPI a);
+void mpi_clear(MPI a);
+int mpi_set(MPI w, MPI u);
+int mpi_set_ui(MPI w, ulong u);
+MPI mpi_alloc_set_ui(unsigned long u);
+void mpi_m_check(MPI a);
+void mpi_swap(MPI a, MPI b);
+
+/*-- mpicoder.c --*/
+MPI do_encode_md(const void *sha_buffer, unsigned nbits);
+MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread);
+int mpi_fromstr(MPI val, const char *str);
+u32 mpi_get_keyid(MPI a, u32 *keyid);
+void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign);
+void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign);
+int mpi_set_buffer(MPI a, const void *buffer, unsigned nbytes, int sign);
+
+#define log_mpidump g10_log_mpidump
+
+/*-- mpi-add.c --*/
+int mpi_add_ui(MPI w, MPI u, ulong v);
+int mpi_add(MPI w, MPI u, MPI v);
+int mpi_addm(MPI w, MPI u, MPI v, MPI m);
+int mpi_sub_ui(MPI w, MPI u, ulong v);
+int mpi_sub(MPI w, MPI u, MPI v);
+int mpi_subm(MPI w, MPI u, MPI v, MPI m);
+
+/*-- mpi-mul.c --*/
+int mpi_mul_ui(MPI w, MPI u, ulong v);
+int mpi_mul_2exp(MPI w, MPI u, ulong cnt);
+int mpi_mul(MPI w, MPI u, MPI v);
+int mpi_mulm(MPI w, MPI u, MPI v, MPI m);
+
+/*-- mpi-div.c --*/
+ulong mpi_fdiv_r_ui(MPI rem, MPI dividend, ulong divisor);
+int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor);
+int mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor);
+int mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor);
+int mpi_tdiv_r(MPI rem, MPI num, MPI den);
+int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den);
+int mpi_tdiv_q_2exp(MPI w, MPI u, unsigned count);
+int mpi_divisible_ui(const MPI dividend, ulong divisor);
+
+/*-- mpi-gcd.c --*/
+int mpi_gcd(MPI g, const MPI a, const MPI b);
+
+/*-- mpi-pow.c --*/
+int mpi_pow(MPI w, MPI u, MPI v);
+int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
+
+/*-- mpi-mpow.c --*/
+int mpi_mulpowm(MPI res, MPI *basearray, MPI *exparray, MPI mod);
+
+/*-- mpi-cmp.c --*/
+int mpi_cmp_ui(MPI u, ulong v);
+int mpi_cmp(MPI u, MPI v);
+
+/*-- mpi-scan.c --*/
+int mpi_getbyte(MPI a, unsigned idx);
+void mpi_putbyte(MPI a, unsigned idx, int value);
+unsigned mpi_trailing_zeros(MPI a);
+
+/*-- mpi-bit.c --*/
+void mpi_normalize(MPI a);
+unsigned mpi_get_nbits(MPI a);
+int mpi_test_bit(MPI a, unsigned n);
+int mpi_set_bit(MPI a, unsigned n);
+int mpi_set_highbit(MPI a, unsigned n);
+void mpi_clear_highbit(MPI a, unsigned n);
+void mpi_clear_bit(MPI a, unsigned n);
+int mpi_rshift(MPI x, MPI a, unsigned n);
+
+/*-- mpi-inv.c --*/
+int mpi_invm(MPI x, MPI u, MPI v);
+
+#endif /*G10_MPI_H */
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
new file mode 100644
index 000000000000..b87487b40a8b
--- /dev/null
+++ b/lib/mpi/longlong.h
@@ -0,0 +1,1478 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+ * Note: I added some stuff for use with gnupg
+ *
+ * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
+ *	2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this file; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA. */
+
+/* You have to define the following before including this file:
+ *
+ * UWtype -- An unsigned type, default type for operations (typically a "word")
+ * UHWtype -- An unsigned type, at least half the size of UWtype.
+ * UDWtype -- An unsigned type, at least twice as large a UWtype
+ * W_TYPE_SIZE -- size in bits of UWtype
+ *
+ * SItype, USItype -- Signed and unsigned 32 bit types.
+ * DItype, UDItype -- Signed and unsigned 64 bit types.
+ *
+ * On a 32 bit machine UWtype should typically be USItype;
+ * on a 64 bit machine, UWtype should typically be UDItype.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* This is used to make sure no undesirable sharing between different libraries
+	that use this file takes place.  */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+
+/* Define auxiliary asm macros.
+ *
+ * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ * word product in HIGH_PROD and LOW_PROD.
+ *
+ * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+ * UDWtype product.  This is just a variant of umul_ppmm.
+
+ * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator) divides a UDWtype, composed by the UWtype integers
+ * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ * in QUOTIENT and the remainder in REMAINDER.	HIGH_NUMERATOR must be less
+ * than DENOMINATOR for correct operation.  If, in addition, the most
+ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ * UDIV_NEEDS_NORMALIZATION is defined to 1.
+ * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+ * is rounded towards 0.
+ *
+ * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ * msb to the first non-zero bit in the UWtype X.  This is the number of
+ * steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+ * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+ *
+ * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ * from the least significant end.
+ *
+ * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ * high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+ * (i.e. carry out) is not stored anywhere, and is lost.
+ *
+ * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+ * and LOW_DIFFERENCE.	Overflow (i.e. carry out) is not stored anywhere,
+ * and is lost.
+ *
+ * If any of these macros are left undefined for a particular CPU,
+ * C macros are used.  */
+
+/* The CPUs come in alphabetical order below.
+ *
+ * Please add support for more CPUs here, or improve the current support
+ * for the CPUs below!	*/
+
+#if defined(__GNUC__) && !defined(NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+	understood by gcc1.	Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+/***************************************
+	**************  A29K  *****************
+	***************************************/
+#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %1,%4,%5\n" \
+		"addc %0,%2,%3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "%r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %1,%4,%5\n" \
+		"subc %0,%2,%3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+		USItype __m0 = (m0), __m1 = (m1); \
+		__asm__ ("multiplu %0,%1,%2" \
+		: "=r" ((USItype)(xl)) \
+		: "r" (__m0), \
+			"r" (__m1)); \
+		__asm__ ("multmu %0,%1,%2" \
+		: "=r" ((USItype)(xh)) \
+		: "r" (__m0), \
+			"r" (__m1)); \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("dividu %0,%3,%4" \
+	: "=r" ((USItype)(q)), \
+		"=q" ((USItype)(r)) \
+	: "1" ((USItype)(n1)), \
+		"r" ((USItype)(n0)), \
+		"r" ((USItype)(d)))
+
+#define count_leading_zeros(count, x) \
+	__asm__ ("clz %0,%1" \
+	: "=r" ((USItype)(count)) \
+	: "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+
+#if defined(__alpha) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+		UDItype __m0 = (m0), __m1 = (m1); \
+		__asm__ ("umulh %r1,%2,%0" \
+		: "=r" ((UDItype) ph) \
+		: "%rJ" (__m0), \
+			"rI" (__m1)); \
+		(pl) = __m0 * __m1; \
+	} while (0)
+#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { UDItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+extern UDItype __udiv_qrnnd();
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha */
+
+/***************************************
+	**************  ARM  ******************
+	***************************************/
+#if defined(__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("adds %1, %4, %5\n" \
+		"adc  %0, %2, %3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "%r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subs %1, %4, %5\n" \
+		"sbc  %0, %2, %3" \
+	: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+	: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(bh)), \
+		"r" ((USItype)(al)), \
+		"rI" ((USItype)(bl)))
+#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
+#define umul_ppmm(xh, xl, a, b) \
+	__asm__ ("%@ Inlined umul_ppmm\n" \
+		"mov	%|r0, %2, lsr #16		@ AAAA\n" \
+		"mov	%|r2, %3, lsr #16		@ BBBB\n" \
+		"bic	%|r1, %2, %|r0, lsl #16		@ aaaa\n" \
+		"bic	%0, %3, %|r2, lsl #16		@ bbbb\n" \
+		"mul	%1, %|r1, %|r2			@ aaaa * BBBB\n" \
+		"mul	%|r2, %|r0, %|r2		@ AAAA * BBBB\n" \
+		"mul	%|r1, %0, %|r1			@ aaaa * bbbb\n" \
+		"mul	%0, %|r0, %0			@ AAAA * bbbb\n" \
+		"adds	%|r0, %1, %0			@ central sum\n" \
+		"addcs	%|r2, %|r2, #65536\n" \
+		"adds	%1, %|r1, %|r0, lsl #16\n" \
+		"adc	%0, %|r2, %|r0, lsr #16" \
+	: "=&r" ((USItype)(xh)), \
+		"=r" ((USItype)(xl)) \
+	: "r" ((USItype)(a)), \
+		"r" ((USItype)(b)) \
+	: "r0", "r1", "r2")
+#else
+#define umul_ppmm(xh, xl, a, b) \
+	__asm__ ("%@ Inlined umul_ppmm\n" \
+		"umull %r1, %r0, %r2, %r3" \
+	: "=&r" ((USItype)(xh)), \
+			"=r" ((USItype)(xl)) \
+	: "r" ((USItype)(a)), \
+			"r" ((USItype)(b)) \
+	: "r0", "r1")
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+/***************************************
+	**************  CLIPPER  **************
+	***************************************/
+#if defined(__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+		struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("mulwux %2,%0" \
+	: "=r" (__xx.__ll) \
+	: "%0" ((USItype)(u)), \
+		"r" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define smul_ppmm(w1, w0, u, v) \
+	({union {DItype __ll; \
+		struct {SItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("mulwx %2,%0" \
+	: "=r" (__xx.__ll) \
+	: "%0" ((SItype)(u)), \
+		"r" ((SItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("mulwux %2,%0" \
+	: "=r" (__w) \
+	: "%0" ((USItype)(u)), \
+		"r" ((USItype)(v))); \
+	__w; })
+#endif /* __clipper__ */
+
+/***************************************
+	**************  GMICRO  ***************
+	***************************************/
+#if defined(__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add.w %5,%1\n" \
+		"addx %3,%0" \
+	: "=g" ((USItype)(sh)), \
+		"=&g" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+		"g" ((USItype)(bh)), \
+		"%1" ((USItype)(al)), \
+		"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub.w %5,%1\n" \
+		"subx %3,%0" \
+	: "=g" ((USItype)(sh)), \
+		"=&g" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+		"g" ((USItype)(bh)), \
+		"1" ((USItype)(al)), \
+		"g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+	__asm__ ("mulx %3,%0,%1" \
+	: "=g" ((USItype)(ph)), \
+		"=r" ((USItype)(pl)) \
+	: "%0" ((USItype)(m0)), \
+		"g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+	__asm__ ("divx %4,%0,%1" \
+	: "=g" ((USItype)(q)), \
+		"=r" ((USItype)(r)) \
+	: "1" ((USItype)(nh)), \
+		"0" ((USItype)(nl)), \
+		"g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+	__asm__ ("bsch/1 %1,%0" \
+	: "=g" (count) \
+	: "g" ((USItype)(x)), \
+	     "0" ((USItype)0))
+#endif
+
+/***************************************
+	**************  HPPA  *****************
+	***************************************/
+#if defined(__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %4,%5,%1\n" \
+		   "addc %2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%rM" ((USItype)(ah)), \
+	     "rM" ((USItype)(bh)), \
+	     "%rM" ((USItype)(al)), \
+	     "rM" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %4,%5,%1\n" \
+	   "subb %2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "rM" ((USItype)(ah)), \
+	     "rM" ((USItype)(bh)), \
+	     "rM" ((USItype)(al)), \
+	     "rM" ((USItype)(bl)))
+#if defined(_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+	union {UDItype __ll; \
+	struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("xmpyu %1,%2,%0" \
+	: "=*f" (__xx.__ll) \
+	: "*f" ((USItype)(u)), \
+	       "*f" ((USItype)(v))); \
+	(wh) = __xx.__i.__h; \
+	(wl) = __xx.__i.__l; \
+} while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { USItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+extern USItype __udiv_qrnnd();
+#endif /* LONGLONG_STANDALONE */
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __tmp; \
+	__asm__ ( \
+	"ldi             1,%0\n" \
+	"extru,=	%1,15,16,%%r0  ; Bits 31..16 zero?\n" \
+	"extru,tr	%1,15,16,%1    ; No.  Shift down, skip add.\n" \
+	"ldo		16(%0),%0      ; Yes.	Perform add.\n" \
+	"extru,=	%1,23,8,%%r0   ; Bits 15..8 zero?\n" \
+	"extru,tr	%1,23,8,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		8(%0),%0       ; Yes.	Perform add.\n" \
+	"extru,=	%1,27,4,%%r0   ; Bits 7..4 zero?\n" \
+	"extru,tr	%1,27,4,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		4(%0),%0       ; Yes.	Perform add.\n" \
+	"extru,=	%1,29,2,%%r0   ; Bits 3..2 zero?\n" \
+	"extru,tr	%1,29,2,%1     ; No.  Shift down, skip add.\n" \
+	"ldo		2(%0),%0       ; Yes.	Perform add.\n" \
+	"extru		%1,30,1,%1     ; Extract bit 1.\n" \
+	"sub		%0,%1,%0       ; Subtract it.              " \
+	: "=r" (count), "=r" (__tmp) : "1" (x)); \
+} while (0)
+#endif /* hppa */
+
+/***************************************
+	**************  I370  *****************
+	***************************************/
+#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mr %0,%3" \
+	: "=r" (__xx.__i.__h), \
+	       "=r" (__xx.__i.__l) \
+	: "%1" (__m0), \
+	       "r" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	     + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define smul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {DItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("mr %0,%3" \
+	: "=r" (__xx.__i.__h), \
+	       "=r" (__xx.__i.__l) \
+	: "%1" (m0), \
+	       "r" (m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	union {DItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__xx.__i.__h = n1; __xx.__i.__l = n0; \
+	__asm__ ("dr %0,%2" \
+	: "=r" (__xx.__ll) \
+	: "0" (__xx.__ll), "r" (d)); \
+	(q) = __xx.__i.__l; (r) = __xx.__i.__h; \
+} while (0)
+#endif
+
+/***************************************
+	**************  I386  *****************
+	***************************************/
+#undef __i386__
+#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addl %5,%1\n" \
+	   "adcl %3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	     "g" ((USItype)(bh)), \
+	     "%1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subl %5,%1\n" \
+	   "sbbl %3,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	     "g" ((USItype)(bh)), \
+	     "1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("mull %3" \
+	: "=a" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "%0" ((USItype)(u)), \
+	     "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divl %4" \
+	: "=a" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "rm" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("bsrl %1,%0" \
+	: "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define count_trailing_zeros(count, x) \
+	__asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
+#define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+
+/***************************************
+	**************  I860  *****************
+	***************************************/
+#if defined(__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r, h, l, c) \
+	__asm__ ("shr %3,r0,r0\n" \
+	"shrd %1,%2,%0" \
+	   "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+
+/***************************************
+	**************  I960  *****************
+	***************************************/
+#if defined(__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("cmpo 1,0\n" \
+	"addc %5,%4,%1\n" \
+	"addc %3,%2,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%dI" ((USItype)(ah)), \
+	     "dI" ((USItype)(bh)), \
+	     "%dI" ((USItype)(al)), \
+	     "dI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("cmpo 0,0\n" \
+	"subc %5,%4,%1\n" \
+	"subc %3,%2,%0" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "dI" ((USItype)(ah)), \
+	     "dI" ((USItype)(bh)), \
+	     "dI" ((USItype)(al)), \
+	     "dI" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("emul        %2,%1,%0" \
+	: "=d" (__xx.__ll) \
+	: "%dI" ((USItype)(u)), \
+	     "dI" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("emul      %2,%1,%0" \
+	: "=d" (__w) \
+	: "%dI" ((USItype)(u)), \
+	       "dI" ((USItype)(v))); \
+	__w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __nn; \
+	__nn.__i.__h = (nh); __nn.__i.__l = (nl); \
+	__asm__ ("ediv %d,%n,%0" \
+	: "=d" (__rq.__ll) \
+	: "dI" (__nn.__ll), \
+	     "dI" ((USItype)(d))); \
+	(r) = __rq.__i.__l; (q) = __rq.__i.__h; \
+} while (0)
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("scanbit %1,%0" \
+	: "=r" (__cbtmp) \
+	: "r" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 (-32)	/* sic */
+#if defined(__i960mx)		/* what is the proper symbol to test??? */
+#define rshift_rhlc(r, h, l, c) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __nn; \
+	__nn.__i.__h = (h); __nn.__i.__l = (l); \
+	__asm__ ("shre %2,%1,%0" \
+	: "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+}
+#endif /* i960mx */
+#endif /* i960 */
+
+/***************************************
+	**************  68000	****************
+	***************************************/
+#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add%.l %5,%1\n" \
+	   "addx%.l %3,%0" \
+	: "=d" ((USItype)(sh)), \
+	     "=&d" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	     "d" ((USItype)(bh)), \
+	     "%1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub%.l %5,%1\n" \
+	   "subx%.l %3,%0" \
+	: "=d" ((USItype)(sh)), \
+	     "=&d" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	     "d" ((USItype)(bh)), \
+	     "1" ((USItype)(al)), \
+	     "g" ((USItype)(bl)))
+#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("mulu%.l %3,%1:%0" \
+	: "=d" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "%0" ((USItype)(u)), \
+	     "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divu%.l %4,%1:%0" \
+	: "=d" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("divs%.l %4,%1:%0" \
+	: "=d" ((USItype)(q)), \
+	     "=d" ((USItype)(r)) \
+	: "0" ((USItype)(n0)), \
+	     "1" ((USItype)(n1)), \
+	     "dmi" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+	__asm__ ("bfffo %1{%b2:%b2},%0" \
+	: "=d" ((USItype)(count)) \
+	: "od" ((USItype)(x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#else /* not mc68020 */
+#define umul_ppmm(xh, xl, a, b) \
+do { USItype __umul_tmp1, __umul_tmp2; \
+	__asm__ ("| Inlined umul_ppmm\n" \
+	"move%.l %5,%3\n" \
+	"move%.l %2,%0\n" \
+	"move%.w %3,%1\n" \
+	"swap	%3\n" \
+	"swap	%0\n" \
+	"mulu	%2,%1\n" \
+	"mulu	%3,%0\n" \
+	"mulu	%2,%3\n" \
+	"swap	%2\n" \
+	"mulu	%5,%2\n" \
+	"add%.l	%3,%2\n" \
+	"jcc	1f\n" \
+	"add%.l	%#0x10000,%0\n" \
+	"1:	move%.l %2,%3\n" \
+	"clr%.w	%2\n" \
+	"swap	%2\n" \
+	"swap	%3\n" \
+	"clr%.w	%3\n" \
+	"add%.l	%3,%1\n" \
+	"addx%.l %2,%0\n" \
+	"| End inlined umul_ppmm" \
+	: "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+		"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+	: "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+} while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+#endif /* mc68000 */
+
+/***************************************
+	**************  88000	****************
+	***************************************/
+#if defined(__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addu.co %1,%r4,%r5\n" \
+	   "addu.ci %0,%r2,%r3" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "%rJ" ((USItype)(ah)), \
+	     "rJ" ((USItype)(bh)), \
+	     "%rJ" ((USItype)(al)), \
+	     "rJ" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subu.co %1,%r4,%r5\n" \
+	   "subu.ci %0,%r2,%r3" \
+	: "=r" ((USItype)(sh)), \
+	     "=&r" ((USItype)(sl)) \
+	: "rJ" ((USItype)(ah)), \
+	     "rJ" ((USItype)(bh)), \
+	     "rJ" ((USItype)(al)), \
+	     "rJ" ((USItype)(bl)))
+#define count_leading_zeros(count, x) \
+do { \
+	USItype __cbtmp; \
+	__asm__ ("ff1 %0,%1" \
+	: "=r" (__cbtmp) \
+	: "r" ((USItype)(x))); \
+	(count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 63	/* sic */
+#if defined(__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+	union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __x; \
+	__asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
+	(wh) = __x.__i.__h; \
+	(wl) = __x.__i.__l; \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	({union {UDItype __ll; \
+	   struct {USItype __h, __l; } __i; \
+	} __x, __q; \
+	__x.__i.__h = (n1); __x.__i.__l = (n0); \
+	__asm__ ("divu.d %0,%1,%2" \
+	: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
+	(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+
+/***************************************
+	**************  MIPS  *****************
+	***************************************/
+#if defined(__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("multu %2,%3" \
+	: "=l" ((USItype)(w0)), \
+	     "=h" ((USItype)(w1)) \
+	: "d" ((USItype)(u)), \
+	     "d" ((USItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("multu %2,%3\n" \
+	   "mflo %0\n" \
+	   "mfhi %1" \
+	: "=d" ((USItype)(w0)), \
+	     "=d" ((USItype)(w1)) \
+	: "d" ((USItype)(u)), \
+	     "d" ((USItype)(v)))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips__ */
+
+/***************************************
+	**************  MIPS/64  **************
+	***************************************/
+#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("dmultu %2,%3" \
+	: "=l" ((UDItype)(w0)), \
+	     "=h" ((UDItype)(w1)) \
+	: "d" ((UDItype)(u)), \
+	     "d" ((UDItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("dmultu %2,%3\n" \
+	   "mflo %0\n" \
+	   "mfhi %1" \
+	: "=d" ((UDItype)(w0)), \
+	     "=d" ((UDItype)(w1)) \
+	: "d" ((UDItype)(u)), \
+	     "d" ((UDItype)(v)))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips__ */
+
+/***************************************
+	**************  32000	****************
+	***************************************/
+#if defined(__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__asm__ ("meid %2,%0" \
+	: "=g" (__xx.__ll) \
+	: "%0" ((USItype)(u)), \
+	     "g" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+	({UDItype __w; \
+	__asm__ ("meid %2,%0" \
+	: "=g" (__w) \
+	: "%0" ((USItype)(u)), \
+	       "g" ((USItype)(v))); \
+	__w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	({union {UDItype __ll; \
+	   struct {USItype __l, __h; } __i; \
+	} __xx; \
+	__xx.__i.__h = (n1); __xx.__i.__l = (n0); \
+	__asm__ ("deid %2,%0" \
+	: "=g" (__xx.__ll) \
+	: "0" (__xx.__ll), \
+	     "g" ((USItype)(d))); \
+	(r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count, x) \
+do { \
+	__asm__("ffsd      %2,%0" \
+	: "=r"((USItype) (count)) \
+	: "0"((USItype) 0), "r"((USItype) (x))); \
+	} while (0)
+#endif /* __ns32000__ */
+
+/***************************************
+	**************  PPC  ******************
+	***************************************/
+#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+	if (__builtin_constant_p(bh) && (bh) == 0) \
+		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+		__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+	else \
+		__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "%r" ((USItype)(ah)), \
+		"r" ((USItype)(bh)), \
+		"%r" ((USItype)(al)), \
+		"rI" ((USItype)(bl))); \
+} while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+	if (__builtin_constant_p(ah) && (ah) == 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+		__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+	else \
+		__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+		: "=r" ((USItype)(sh)), \
+		"=&r" ((USItype)(sl)) \
+		: "r" ((USItype)(ah)), \
+		"r" ((USItype)(bh)), \
+		"rI" ((USItype)(al)), \
+		"r" ((USItype)(bl))); \
+} while (0)
+#define count_leading_zeros(count, x) \
+	__asm__ ("{cntlz|cntlzw} %0,%1" \
+	: "=r" ((USItype)(count)) \
+	: "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined(_ARCH_PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mulhwu %0,%1,%2" \
+	: "=r" ((USItype) ph) \
+	: "%r" (__m0), \
+	"r" (__m1)); \
+	(pl) = __m0 * __m1; \
+} while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+do { \
+	SItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mulhw %0,%1,%2" \
+	: "=r" ((SItype) ph) \
+	: "%r" (__m0), \
+	"r" (__m1)); \
+	(pl) = __m0 * __m1; \
+} while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mul %0,%2,%3" \
+	: "=r" ((USItype)(xh)), \
+	"=q" ((USItype)(xl)) \
+	: "r" (__m0), \
+	"r" (__m1)); \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+	__asm__ ("mul %0,%2,%3" \
+	: "=r" ((SItype)(xh)), \
+	"=q" ((SItype)(xl)) \
+	: "r" (m0), \
+	"r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+	__asm__ ("div %0,%2,%4" \
+	: "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
+	: "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
+#define UDIV_TIME 100
+#endif
+#endif /* Power architecture variants.  */
+
+/***************************************
+	**************  PYR  ******************
+	***************************************/
+#if defined(__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addw        %5,%1\n" \
+	"addwc	%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subw        %5,%1\n" \
+	"subwb	%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+	/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
+#define umul_ppmm(w1, w0, u, v) \
+	({union {UDItype __ll; \
+	struct {USItype __h, __l; } __i; \
+	} __xx; \
+	__asm__ ("movw %1,%R0\n" \
+	"uemul %2,%0" \
+	: "=&r" (__xx.__ll) \
+	: "g" ((USItype) (u)), \
+	"g" ((USItype)(v))); \
+	(w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#endif /* __pyr__ */
+
+/***************************************
+	**************  RT/ROMP  **************
+	***************************************/
+#if defined(__ibm032__) /* RT/ROMP */	&& W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("a %1,%5\n" \
+	"ae %0,%3" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"r" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("s %1,%5\n" \
+	"se %0,%3" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"r" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"r" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ( \
+	"s       r2,r2\n" \
+	"mts	r10,%2\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"m	r2,%3\n" \
+	"cas	%0,r2,r0\n" \
+	"mfs	r10,%1" \
+	: "=r" ((USItype)(ph)), \
+	"=r" ((USItype)(pl)) \
+	: "%r" (__m0), \
+	"r" (__m1) \
+	: "r2"); \
+	(ph) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+do { \
+	if ((x) >= 0x10000) \
+		__asm__ ("clz     %0,%1" \
+		: "=r" ((USItype)(count)) \
+		: "r" ((USItype)(x) >> 16)); \
+	else { \
+		__asm__ ("clz   %0,%1" \
+		: "=r" ((USItype)(count)) \
+		: "r" ((USItype)(x))); \
+		(count) += 16; \
+	} \
+} while (0)
+#endif /* RT/ROMP */
+
+/***************************************
+	**************  SH2  ******************
+	***************************************/
+#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
+	&& W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ( \
+	"dmulu.l %2,%3\n" \
+	"sts	macl,%1\n" \
+	"sts	mach,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)) \
+	: "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+/***************************************
+	**************  SPARC	****************
+	***************************************/
+#if defined(__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addcc %r4,%5,%1\n" \
+	"addx %r2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "%rJ" ((USItype)(ah)), \
+	"rI" ((USItype)(bh)), \
+	"%rJ" ((USItype)(al)), \
+	"rI" ((USItype)(bl)) \
+	__CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subcc %r4,%5,%1\n" \
+	"subx %r2,%3,%0" \
+	: "=r" ((USItype)(sh)), \
+	"=&r" ((USItype)(sl)) \
+	: "rJ" ((USItype)(ah)), \
+	"rI" ((USItype)(bh)), \
+	"rJ" ((USItype)(al)), \
+	"rI" ((USItype)(bl)) \
+	__CLOBBER_CC)
+#if defined(__sparc_v8__)
+/* Don't match immediate range because, 1) it is not often useful,
+	2) the 'I' flag thinks of the range as a 13 bit signed interval,
+	while we want to match a 13 bit interval, sign extended to 32 bits,
+	but INTERPRETED AS UNSIGNED.  */
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)))
+#define UMUL_TIME 5
+#ifndef SUPERSPARC		/* SuperSPARC's udiv only handles 53 bit dividends */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	USItype __q; \
+	__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+	: "=r" ((USItype)(__q)) \
+	: "r" ((USItype)(n1)), \
+	"r" ((USItype)(n0)), \
+	"r" ((USItype)(d))); \
+	(r) = (n0) - __q * (d); \
+	(q) = __q; \
+} while (0)
+#define UDIV_TIME 25
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
+#if defined(__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+	instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("umul %2,%3,%1;rd %%y,%0" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "r" ((USItype)(u)), \
+	"r" ((USItype)(v)))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+	__asm__ ("! Inlined udiv_qrnnd\n" \
+	"wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n" \
+	"tst	%%g0\n" \
+	"divscc	%3,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%%g1\n" \
+	"divscc	%%g1,%4,%0\n" \
+	"rd	%%y,%1\n" \
+	"bl,a 1f\n" \
+	"add	%1,%4,%1\n" \
+	"1:	! End of inline udiv_qrnnd" \
+	: "=r" ((USItype)(q)), \
+	"=r" ((USItype)(r)) \
+	: "r" ((USItype)(n1)), \
+	"r" ((USItype)(n0)), \
+	"rI" ((USItype)(d)) \
+	: "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+	__asm__ ("scan %1,0,%0" \
+	: "=r" ((USItype)(x)) \
+	: "r" ((USItype)(count)))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+	implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+	undefined.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+	/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+	__asm__ ("! Inlined umul_ppmm\n" \
+	"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n" \
+	"sra	%3,31,%%g2	! Don't move this insn\n" \
+	"and	%2,%%g2,%%g2	! Don't move this insn\n" \
+	"andcc	%%g0,0,%%g1	! Don't move this insn\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,%3,%%g1\n" \
+	"mulscc	%%g1,0,%%g1\n" \
+	"add	%%g1,%%g2,%0\n" \
+	"rd	%%y,%1" \
+	: "=r" ((USItype)(w1)), \
+	"=r" ((USItype)(w0)) \
+	: "%rI" ((USItype)(u)), \
+	"r" ((USItype)(v)) \
+	: "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39		/* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { USItype __r; \
+	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+	(r) = __r; \
+} while (0)
+	extern USItype __udiv_qrnnd();
+#define UDIV_TIME 140
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
+#endif /* __sparc__ */
+
+/***************************************
+	**************  VAX  ******************
+	***************************************/
+#if defined(__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("addl2 %5,%1\n" \
+	"adwc %3,%0" \
+	: "=g" ((USItype)(sh)), \
+	"=&g" ((USItype)(sl)) \
+	: "%0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"%1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("subl2 %5,%1\n" \
+	"sbwc %3,%0" \
+	: "=g" ((USItype)(sh)), \
+	"=&g" ((USItype)(sl)) \
+	: "0" ((USItype)(ah)), \
+	"g" ((USItype)(bh)), \
+	"1" ((USItype)(al)), \
+	"g" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {UDItype __ll; \
+	struct {USItype __l, __h; } __i; \
+	} __xx; \
+	USItype __m0 = (m0), __m1 = (m1); \
+	__asm__ ("emul %1,%2,$0,%0" \
+	: "=g" (__xx.__ll) \
+	: "g" (__m0), \
+	"g" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((SItype) __m0 >> 31) & __m1) \
+	+ (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+	union {DItype __ll; \
+	struct {SItype __l, __h; } __i; \
+	} __xx; \
+	__xx.__i.__h = n1; __xx.__i.__l = n0; \
+	__asm__ ("ediv %3,%2,%0,%1" \
+	: "=g" (q), "=g" (r) \
+	: "g" (__xx.__ll), "g" (d)); \
+} while (0)
+#endif /* __vax__ */
+
+/***************************************
+	**************  Z8000	****************
+	***************************************/
+#if defined(__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+	__asm__ ("add %H1,%H5\n\tadc  %H0,%H3" \
+	: "=r" ((unsigned int)(sh)), \
+	"=&r" ((unsigned int)(sl)) \
+	: "%0" ((unsigned int)(ah)), \
+	"r" ((unsigned int)(bh)), \
+	"%1" ((unsigned int)(al)), \
+	"rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+	__asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3" \
+	: "=r" ((unsigned int)(sh)), \
+	"=&r" ((unsigned int)(sl)) \
+	: "0" ((unsigned int)(ah)), \
+	"r" ((unsigned int)(bh)), \
+	"1" ((unsigned int)(al)), \
+	"rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+	union {long int __ll; \
+	struct {unsigned int __h, __l; } __i; \
+	} __xx; \
+	unsigned int __m0 = (m0), __m1 = (m1); \
+	__asm__ ("mult      %S0,%H3" \
+	: "=r" (__xx.__i.__h), \
+	"=r" (__xx.__i.__l) \
+	: "%1" (__m0), \
+	"rQR" (__m1)); \
+	(xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+	(xh) += ((((signed int) __m0 >> 15) & __m1) \
+	+ (((signed int) __m1 >> 15) & __m0)); \
+} while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+/***************************************
+	***********  Generic Versions	********
+	***************************************/
+#if !defined(umul_ppmm) && defined(__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+{ \
+	UDWtype __ll = __umulsidi3(m0, m1); \
+	ph = (UWtype) (__ll >> W_TYPE_SIZE); \
+	pl = (UWtype) __ll; \
+}
+#endif
+
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) \
+	({UWtype __hi, __lo; \
+	umul_ppmm(__hi, __lo, u, v); \
+	((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+
+	/* If this machine has no inline assembler, use C macros.  */
+
+#if !defined(add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) + (bl); \
+	(sh) = (ah) + (bh) + (__x < (al)); \
+	(sl) = __x; \
+} while (0)
+#endif
+
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) - (bl); \
+	(sh) = (ah) - (bh) - (__x > (al)); \
+	(sl) = __x; \
+} while (0)
+#endif
+
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+	UWtype __x0, __x1, __x2, __x3; \
+	UHWtype __ul, __vl, __uh, __vh; \
+	UWtype __u = (u), __v = (v); \
+	\
+	__ul = __ll_lowpart(__u); \
+	__uh = __ll_highpart(__u); \
+	__vl = __ll_lowpart(__v); \
+	__vh = __ll_highpart(__v); \
+	\
+	__x0 = (UWtype) __ul * __vl; \
+	__x1 = (UWtype) __ul * __vh; \
+	__x2 = (UWtype) __uh * __vl; \
+	__x3 = (UWtype) __uh * __vh; \
+	\
+	__x1 += __ll_highpart(__x0);/* this can't give carry */ \
+	__x1 += __x2;		/* but this indeed can */ \
+	if (__x1 < __x2)		/* did we get it? */ \
+	__x3 += __ll_B;		/* yes, add it in the proper pos. */ \
+	\
+	(w1) = __x3 + __ll_highpart(__x1); \
+	(w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
+} while (0)
+#endif
+
+#if !defined(umul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+do { \
+	UWtype __w1; \
+	UWtype __m0 = (u), __m1 = (v); \
+	umul_ppmm(__w1, w0, __m0, __m1); \
+	(w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
+	- (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
+} while (0)
+#endif
+
+	/* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+do { \
+	UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+	__d1 = __ll_highpart(d); \
+	__d0 = __ll_lowpart(d); \
+	\
+	__r1 = (n1) % __d1; \
+	__q1 = (n1) / __d1; \
+	__m = (UWtype) __q1 * __d0; \
+	__r1 = __r1 * __ll_B | __ll_highpart(n0); \
+	if (__r1 < __m) { \
+		__q1--, __r1 += (d); \
+		if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
+		if (__r1 < __m) \
+			__q1--, __r1 += (d); \
+	} \
+	__r1 -= __m; \
+	\
+	__r0 = __r1 % __d1; \
+	__q0 = __r1 / __d1; \
+	__m = (UWtype) __q0 * __d0; \
+	__r0 = __r0 * __ll_B | __ll_lowpart(n0); \
+	if (__r0 < __m) { \
+		__q0--, __r0 += (d); \
+		if (__r0 >= (d)) \
+			if (__r0 < __m) \
+				__q0--, __r0 += (d); \
+	} \
+	__r0 -= __m; \
+	\
+	(q) = (UWtype) __q1 * __ll_B | __q0; \
+	(r) = __r0; \
+} while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+	__udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+	UWtype __r; \
+	(q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
+	(r) = __r; \
+} while (0)
+#endif
+
+	/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#undef count_leading_zeros
+#if !defined(count_leading_zeros)
+	extern
+#ifdef __STDC__
+			const
+#endif
+			unsigned char __clz_tab[];
+#define count_leading_zeros(count, x) \
+do { \
+	UWtype __xr = (x); \
+	UWtype __a; \
+	\
+	if (W_TYPE_SIZE <= 32) { \
+		__a = __xr < ((UWtype) 1 << 2*__BITS4) \
+		? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
+		: (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4); \
+	} \
+	else { \
+		for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+			if (((__xr >> __a) & 0xff) != 0) \
+				break; \
+	} \
+	\
+	(count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
+} while (0)
+	/* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+#if !defined(count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+	defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+do { \
+	UWtype __ctz_x = (x); \
+	UWtype __ctz_c; \
+	count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \
+	(count) = W_TYPE_SIZE - 1 - __ctz_c; \
+} while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h
new file mode 100644
index 000000000000..e2b39852b30a
--- /dev/null
+++ b/lib/mpi/mpi-inline.h
@@ -0,0 +1,122 @@
+/* mpi-inline.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INLINE_H
+#define G10_MPI_INLINE_H
+
+#ifndef G10_MPI_INLINE_DECL
+#define G10_MPI_INLINE_DECL  extern inline
+#endif
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t x;
+
+	x = *s1_ptr++;
+	s2_limb += x;
+	*res_ptr++ = s2_limb;
+	if (s2_limb < x) {	/* sum is less than the left operand: handle carry */
+		while (--s1_size) {
+			x = *s1_ptr++ + 1;	/* add carry */
+			*res_ptr++ = x;	/* and store */
+			if (x)	/* not 0 (no overflow): we can stop */
+				goto leave;
+		}
+		return 1;	/* return carry (size of s1 to small) */
+	}
+
+leave:
+	if (res_ptr != s1_ptr) {	/* not the same variable */
+		mpi_size_t i;	/* copy the rest */
+		for (i = 0; i < s1_size - 1; i++)
+			res_ptr[i] = s1_ptr[i];
+	}
+	return 0;		/* no carry */
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	    mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+	mpi_limb_t cy = 0;
+
+	if (s2_size)
+		cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+	if (s1_size - s2_size)
+		cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size,
+				   s1_size - s2_size, cy);
+	return cy;
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+	      mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+	mpi_limb_t x;
+
+	x = *s1_ptr++;
+	s2_limb = x - s2_limb;
+	*res_ptr++ = s2_limb;
+	if (s2_limb > x) {
+		while (--s1_size) {
+			x = *s1_ptr++;
+			*res_ptr++ = x - 1;
+			if (x)
+				goto leave;
+		}
+		return 1;
+	}
+
+leave:
+	if (res_ptr != s1_ptr) {
+		mpi_size_t i;
+		for (i = 0; i < s1_size - 1; i++)
+			res_ptr[i] = s1_ptr[i];
+	}
+	return 0;
+}
+
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+	    mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+	mpi_limb_t cy = 0;
+
+	if (s2_size)
+		cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+	if (s1_size - s2_size)
+		cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
+				   s1_size - s2_size, cy);
+	return cy;
+}
+
+#endif /*G10_MPI_INLINE_H */
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
new file mode 100644
index 000000000000..77adcf6bc257
--- /dev/null
+++ b/lib/mpi/mpi-internal.h
@@ -0,0 +1,261 @@
+/* mpi-internal.h  -  Internal to the Multi Precision Integers
+ *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *	Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+#ifndef G10_MPI_INTERNAL_H
+#define G10_MPI_INTERNAL_H
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/mpi.h>
+#include <linux/errno.h>
+
+#define log_debug printk
+#define log_bug printk
+
+#define assert(x) \
+	do { \
+		if (!x) \
+			log_bug("failed assertion\n"); \
+	} while (0);
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+ * value which is good on most machines.  */
+
+/* tested 4, 16, 32 and 64, where 16 gave the best performance when
+ * checking a 768 and a 1024 bit ElGamal signature.
+ * (wk 22.12.97) */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 16
+#endif
+
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
+#if KARATSUBA_THRESHOLD < 2
+#undef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 2
+#endif
+
+typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */
+typedef int mpi_size_t;		/* (must be a signed type) */
+
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l, o) ((l) < (o) ? (l) : (o))
+#define MAX(h, i) ((h) > (i) ? (h) : (i))
+
+static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
+{
+	if (a->alloced < b)
+		return mpi_resize(a, b);
+	return 0;
+}
+
+/* Copy N limbs from S to D.  */
+#define MPN_COPY(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+#define MPN_COPY_INCR(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (d)[_i];	\
+	} while (0)
+
+#define MPN_COPY_DECR(d, s, n) \
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = (n)-1; _i >= 0; _i--) \
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+/* Zero N limbs at D */
+#define MPN_ZERO(d, n) \
+	do {					\
+		int  _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = 0;		\
+	} while (0)
+
+#define MPN_NORMALIZE(d, n)  \
+	do {					\
+		while ((n) > 0) {		\
+			if ((d)[(n)-1])		\
+				break;		\
+			(n)--;			\
+		}				\
+	} while (0)
+
+#define MPN_NORMALIZE_NOT_ZERO(d, n) \
+	do {				\
+		for (;;) {		\
+			if ((d)[(n)-1])	\
+				break;	\
+			(n)--;		\
+		}			\
+	} while (0)
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+	do {							\
+		if ((size) < KARATSUBA_THRESHOLD)		\
+			mul_n_basecase(prodp, up, vp, size);	\
+		else						\
+			mul_n(prodp, up, vp, size, tspace);	\
+	} while (0);
+
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones).  For correct operation, the most significant bit of D
+ * has to be set.  Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
+	do {								\
+		mpi_limb_t _q, _ql, _r;					\
+		mpi_limb_t _xh, _xl;					\
+		umul_ppmm(_q, _ql, (nh), (di));				\
+		_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */ \
+		umul_ppmm(_xh, _xl, _q, (d));				\
+		sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);		\
+		if (_xh) {						\
+			sub_ddmmss(_xh, _r, _xh, _r, 0, (d));		\
+			_q++;						\
+			if (_xh) {					\
+				sub_ddmmss(_xh, _r, _xh, _r, 0, (d));	\
+				_q++;					\
+			}						\
+		}							\
+		if (_r >= (d)) {					\
+			_r -= (d);					\
+			_q++;						\
+		}							\
+		(r) = _r;						\
+		(q) = _q;						\
+	} while (0)
+
+/*-- mpiutil.c --*/
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
+void mpi_free_limb_space(mpi_ptr_t a);
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
+
+/*-- mpi-bit.c --*/
+void mpi_rshift_limbs(MPI a, unsigned int count);
+int mpi_lshift_limbs(MPI a, unsigned int count);
+
+/*-- mpihelp-add.c --*/
+mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-sub.c --*/
+mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-cmp.c --*/
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
+
+/*-- mpihelp-mul.c --*/
+
+struct karatsuba_ctx {
+	struct karatsuba_ctx *next;
+	mpi_ptr_t tspace;
+	mpi_size_t tspace_size;
+	mpi_ptr_t tp;
+	mpi_size_t tp_size;
+};
+
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx);
+
+mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			    mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			    mpi_size_t s1_size, mpi_limb_t s2_limb);
+int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
+int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+		mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
+void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
+		mpi_ptr_t tspace);
+
+int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+			       mpi_ptr_t up, mpi_size_t usize,
+			       mpi_ptr_t vp, mpi_size_t vsize,
+			       struct karatsuba_ctx *ctx);
+
+/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
+mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+			 mpi_size_t s1_size, mpi_limb_t s2_limb);
+
+/*-- mpihelp-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			 mpi_limb_t divisor_limb);
+mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+			  mpi_ptr_t np, mpi_size_t nsize,
+			  mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+			    mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			    mpi_limb_t divisor_limb);
+
+/*-- mpihelp-shift.c --*/
+mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+			  unsigned cnt);
+mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+			  unsigned cnt);
+
+/* Define stuff for longlong.h.  */
+#define W_TYPE_SIZE BITS_PER_MPI_LIMB
+typedef mpi_limb_t UWtype;
+typedef unsigned int UHWtype;
+#if defined(__GNUC__)
+typedef unsigned int UQItype __attribute__ ((mode(QI)));
+typedef int SItype __attribute__ ((mode(SI)));
+typedef unsigned int USItype __attribute__ ((mode(SI)));
+typedef int DItype __attribute__ ((mode(DI)));
+typedef unsigned int UDItype __attribute__ ((mode(DI)));
+#else
+typedef unsigned char UQItype;
+typedef long SItype;
+typedef unsigned long USItype;
+#endif
+
+#ifdef __GNUC__
+#include "mpi-inline.h"
+#endif
+
+#endif /*G10_MPI_INTERNAL_H */
-- 
cgit v1.2.3


From 051dbb918c7fb7da8e64a2cd0d804ba73399709f Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Fri, 14 Oct 2011 15:25:16 +0300
Subject: crypto: digital signature verification support

This patch implements RSA digital signature verification using GnuPG library.

The format of the signature and the public key is defined by their respective
headers. The signature header contains version information, algorithm,
and keyid, which was used to generate the signature.
The key header contains version and algorythim type.
The payload of the signature and the key are multi-precision integers.

The signing and key management utilities evm-utils provide functionality
to generate signatures and load keys into the kernel keyring.
When the key is added to the kernel keyring, the keyid defines the name
of the key.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
---
 Documentation/digsig.txt |  96 ++++++++++++++++
 include/linux/digsig.h   |  64 +++++++++++
 lib/Kconfig              |   8 ++
 lib/Makefile             |   1 +
 lib/digsig.c             | 284 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 453 insertions(+)
 create mode 100644 Documentation/digsig.txt
 create mode 100644 include/linux/digsig.h
 create mode 100644 lib/digsig.c

(limited to 'include/linux')

diff --git a/Documentation/digsig.txt b/Documentation/digsig.txt
new file mode 100644
index 000000000000..3f682889068b
--- /dev/null
+++ b/Documentation/digsig.txt
@@ -0,0 +1,96 @@
+Digital Signature Verification API
+
+CONTENTS
+
+1. Introduction
+2. API
+3. User-space utilities
+
+
+1. Introduction
+
+Digital signature verification API provides a method to verify digital signature.
+Currently digital signatures are used by the IMA/EVM integrity protection subsystem.
+
+Digital signature verification is implemented using cut-down kernel port of
+GnuPG multi-precision integers (MPI) library. The kernel port provides
+memory allocation errors handling, has been refactored according to kernel
+coding style, and checkpatch.pl reported errors and warnings have been fixed.
+
+Public key and signature consist of header and MPIs.
+
+struct pubkey_hdr {
+	uint8_t		version;	/* key format version */
+	time_t		timestamp;	/* key made, always 0 for now */
+	uint8_t		algo;
+	uint8_t		nmpi;
+	char		mpi[0];
+} __packed;
+
+struct signature_hdr {
+	uint8_t		version;	/* signature format version */
+	time_t		timestamp;	/* signature made */
+	uint8_t		algo;
+	uint8_t		hash;
+	uint8_t		keyid[8];
+	uint8_t		nmpi;
+	char		mpi[0];
+} __packed;
+
+keyid equals to SHA1[12-19] over the total key content.
+Signature header is used as an input to generate a signature.
+Such approach insures that key or signature header could not be changed.
+It protects timestamp from been changed and can be used for rollback
+protection.
+
+2. API
+
+API currently includes only 1 function:
+
+	digsig_verify() - digital signature verification with public key
+
+
+/**
+ * digsig_verify() - digital signature verification with public key
+ * @keyring:	keyring to search key in
+ * @sig:	digital signature
+ * @sigen:	length of the signature
+ * @data:	data
+ * @datalen:	length of the data
+ * @return:	0 on success, -EINVAL otherwise
+ *
+ * Verifies data integrity against digital signature.
+ * Currently only RSA is supported.
+ * Normally hash of the content is used as a data for this function.
+ *
+ */
+int digsig_verify(struct key *keyring, const char *sig, int siglen,
+						const char *data, int datalen);
+
+3. User-space utilities
+
+The signing and key management utilities evm-utils provide functionality
+to generate signatures, to load keys into the kernel keyring.
+Keys can be in PEM or converted to the kernel format.
+When the key is added to the kernel keyring, the keyid defines the name
+of the key: 5D2B05FC633EE3E8 in the example bellow.
+
+Here is example output of the keyctl utility.
+
+$ keyctl show
+Session Keyring
+       -3 --alswrv      0     0  keyring: _ses
+603976250 --alswrv      0    -1   \_ keyring: _uid.0
+817777377 --alswrv      0     0       \_ user: kmk
+891974900 --alswrv      0     0       \_ encrypted: evm-key
+170323636 --alswrv      0     0       \_ keyring: _module
+548221616 --alswrv      0     0       \_ keyring: _ima
+128198054 --alswrv      0     0       \_ keyring: _evm
+
+$ keyctl list 128198054
+1 key in keyring:
+620789745: --alswrv     0     0 user: 5D2B05FC633EE3E8
+
+
+Dmitry Kasatkin
+06.10.2011
diff --git a/include/linux/digsig.h b/include/linux/digsig.h
new file mode 100644
index 000000000000..efae755017d7
--- /dev/null
+++ b/include/linux/digsig.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2011 Nokia Corporation
+ * Copyright (C) 2011 Intel Corporation
+ *
+ * Author:
+ * Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *                 <dmitry.kasatkin@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ */
+
+#ifndef _DIGSIG_H
+#define _DIGSIG_H
+
+#include <linux/key.h>
+
+enum pubkey_algo {
+	PUBKEY_ALGO_RSA,
+	PUBKEY_ALGO_MAX,
+};
+
+enum digest_algo {
+	DIGEST_ALGO_SHA1,
+	DIGEST_ALGO_SHA256,
+	DIGEST_ALGO_MAX
+};
+
+struct pubkey_hdr {
+	uint8_t		version;	/* key format version */
+	time_t		timestamp;	/* key made, always 0 for now */
+	uint8_t		algo;
+	uint8_t		nmpi;
+	char		mpi[0];
+} __packed;
+
+struct signature_hdr {
+	uint8_t		version;	/* signature format version */
+	time_t		timestamp;	/* signature made */
+	uint8_t		algo;
+	uint8_t		hash;
+	uint8_t		keyid[8];
+	uint8_t		nmpi;
+	char		mpi[0];
+} __packed;
+
+#if defined(CONFIG_DIGSIG) || defined(CONFIG_DIGSIG_MODULE)
+
+int digsig_verify(struct key *keyring, const char *sig, int siglen,
+					const char *digest, int digestlen);
+
+#else
+
+static inline int digsig_verify(struct key *keyring, const char *sig,
+				int siglen, const char *digest, int digestlen)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif /* CONFIG_DIGSIG */
+
+#endif /* _DIGSIG_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index d411de53be4f..c1a89185fe6b 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -293,4 +293,12 @@ config MPILIB_EXTRA
 	  This code in unnecessary for RSA digital signature verification,
 	  and can be compiled if needed.
 
+config DIGSIG
+	tristate "In-kernel signature checker"
+	depends on CRYPTO
+	select MPILIB
+	help
+	  Digital signature verification. Currently only RSA is supported.
+	  Implementation is done using GnuPG MPI library
+
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 6ba8cbf4c72f..0f5cff267aff 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -116,6 +116,7 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 obj-$(CONFIG_CORDIC) += cordic.o
 
 obj-$(CONFIG_MPILIB) += mpi/
+obj-$(CONFIG_DIGSIG) += digsig.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
diff --git a/lib/digsig.c b/lib/digsig.c
new file mode 100644
index 000000000000..fd2402f67f89
--- /dev/null
+++ b/lib/digsig.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2011 Nokia Corporation
+ * Copyright (C) 2011 Intel Corporation
+ *
+ * Author:
+ * Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ *                 <dmitry.kasatkin@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * File: sign.c
+ *	implements signature (RSA) verification
+ *	pkcs decoding is based on LibTomCrypt code
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/key.h>
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <keys/user-type.h>
+#include <linux/mpi.h>
+#include <linux/digsig.h>
+
+static struct crypto_shash *shash;
+
+static int pkcs_1_v1_5_decode_emsa(const unsigned char *msg,
+			unsigned long  msglen,
+			unsigned long  modulus_bitlen,
+			unsigned char *out,
+			unsigned long *outlen,
+			int *is_valid)
+{
+	unsigned long modulus_len, ps_len, i;
+	int result;
+
+	/* default to invalid packet */
+	*is_valid = 0;
+
+	modulus_len = (modulus_bitlen >> 3) + (modulus_bitlen & 7 ? 1 : 0);
+
+	/* test message size */
+	if ((msglen > modulus_len) || (modulus_len < 11))
+		return -EINVAL;
+
+	/* separate encoded message */
+	if ((msg[0] != 0x00) || (msg[1] != (unsigned char)1)) {
+		result = -EINVAL;
+		goto bail;
+	}
+
+	for (i = 2; i < modulus_len - 1; i++)
+		if (msg[i] != 0xFF)
+			break;
+
+	/* separator check */
+	if (msg[i] != 0) {
+		/* There was no octet with hexadecimal value 0x00
+		to separate ps from m. */
+		result = -EINVAL;
+		goto bail;
+	}
+
+	ps_len = i - 2;
+
+	if (*outlen < (msglen - (2 + ps_len + 1))) {
+		*outlen = msglen - (2 + ps_len + 1);
+		result = -EOVERFLOW;
+		goto bail;
+	}
+
+	*outlen = (msglen - (2 + ps_len + 1));
+	memcpy(out, &msg[2 + ps_len + 1], *outlen);
+
+	/* valid packet */
+	*is_valid = 1;
+	result    = 0;
+bail:
+	return result;
+}
+
+/*
+ * RSA Signature verification with public key
+ */
+static int digsig_verify_rsa(struct key *key,
+		    const char *sig, int siglen,
+		       const char *h, int hlen)
+{
+	int err = -EINVAL;
+	unsigned long len;
+	unsigned long mlen, mblen;
+	unsigned nret, l;
+	int valid, head, i;
+	unsigned char *out1 = NULL, *out2 = NULL;
+	MPI in = NULL, res = NULL, pkey[2];
+	uint8_t *p, *datap, *endp;
+	struct user_key_payload *ukp;
+	struct pubkey_hdr *pkh;
+
+	down_read(&key->sem);
+	ukp = key->payload.data;
+	pkh = (struct pubkey_hdr *)ukp->data;
+
+	if (pkh->version != 1)
+		goto err1;
+
+	if (pkh->algo != PUBKEY_ALGO_RSA)
+		goto err1;
+
+	if (pkh->nmpi != 2)
+		goto err1;
+
+	datap = pkh->mpi;
+	endp = datap + ukp->datalen;
+
+	for (i = 0; i < pkh->nmpi; i++) {
+		unsigned int remaining = endp - datap;
+		pkey[i] = mpi_read_from_buffer(datap, &remaining);
+		datap += remaining;
+	}
+
+	mblen = mpi_get_nbits(pkey[0]);
+	mlen = (mblen + 7)/8;
+
+	err = -ENOMEM;
+
+	out1 = kzalloc(mlen, GFP_KERNEL);
+	if (!out1)
+		goto err;
+
+	out2 = kzalloc(mlen, GFP_KERNEL);
+	if (!out2)
+		goto err;
+
+	nret = siglen;
+	in = mpi_read_from_buffer(sig, &nret);
+	if (!in)
+		goto err;
+
+	res = mpi_alloc(mpi_get_nlimbs(in) * 2);
+	if (!res)
+		goto err;
+
+	err = mpi_powm(res, in, pkey[1], pkey[0]);
+	if (err)
+		goto err;
+
+	if (mpi_get_nlimbs(res) * BYTES_PER_MPI_LIMB > mlen) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	p = mpi_get_buffer(res, &l, NULL);
+	if (!p) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	len = mlen;
+	head = len - l;
+	memset(out1, 0, head);
+	memcpy(out1 + head, p, l);
+
+	err = -EINVAL;
+	pkcs_1_v1_5_decode_emsa(out1, len, mblen, out2, &len, &valid);
+
+	if (valid && len == hlen)
+		err = memcmp(out2, h, hlen);
+
+err:
+	mpi_free(in);
+	mpi_free(res);
+	kfree(out1);
+	kfree(out2);
+	mpi_free(pkey[0]);
+	mpi_free(pkey[1]);
+err1:
+	up_read(&key->sem);
+
+	return err;
+}
+
+/**
+ * digsig_verify() - digital signature verification with public key
+ * @keyring:	keyring to search key in
+ * @sig:	digital signature
+ * @sigen:	length of the signature
+ * @data:	data
+ * @datalen:	length of the data
+ * @return:	0 on success, -EINVAL otherwise
+ *
+ * Verifies data integrity against digital signature.
+ * Currently only RSA is supported.
+ * Normally hash of the content is used as a data for this function.
+ *
+ */
+int digsig_verify(struct key *keyring, const char *sig, int siglen,
+						const char *data, int datalen)
+{
+	int err = -ENOMEM;
+	struct signature_hdr *sh = (struct signature_hdr *)sig;
+	struct shash_desc *desc = NULL;
+	unsigned char hash[SHA1_DIGEST_SIZE];
+	struct key *key;
+	char name[20];
+
+	if (siglen < sizeof(*sh) + 2)
+		return -EINVAL;
+
+	if (sh->algo != PUBKEY_ALGO_RSA)
+		return -ENOTSUPP;
+
+	sprintf(name, "%llX", __be64_to_cpup((uint64_t *)sh->keyid));
+
+	if (keyring) {
+		/* search in specific keyring */
+		key_ref_t kref;
+		kref = keyring_search(make_key_ref(keyring, 1UL),
+						&key_type_user, name);
+		if (IS_ERR(kref))
+			key = ERR_PTR(PTR_ERR(kref));
+		else
+			key = key_ref_to_ptr(kref);
+	} else {
+		key = request_key(&key_type_user, name, NULL);
+	}
+	if (IS_ERR(key)) {
+		pr_err("key not found, id: %s\n", name);
+		return PTR_ERR(key);
+	}
+
+	desc = kzalloc(sizeof(*desc) + crypto_shash_descsize(shash),
+		       GFP_KERNEL);
+	if (!desc)
+		goto err;
+
+	desc->tfm = shash;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	crypto_shash_init(desc);
+	crypto_shash_update(desc, data, datalen);
+	crypto_shash_update(desc, sig, sizeof(*sh));
+	crypto_shash_final(desc, hash);
+
+	kfree(desc);
+
+	/* pass signature mpis address */
+	err = digsig_verify_rsa(key, sig + sizeof(*sh), siglen - sizeof(*sh),
+			     hash, sizeof(hash));
+
+err:
+	key_put(key);
+
+	return err ? -EINVAL : 0;
+}
+EXPORT_SYMBOL_GPL(digsig_verify);
+
+static int __init digsig_init(void)
+{
+	shash = crypto_alloc_shash("sha1", 0, 0);
+	if (IS_ERR(shash)) {
+		pr_err("shash allocation failed\n");
+		return  PTR_ERR(shash);
+	}
+
+	return 0;
+
+}
+
+static void __exit digsig_cleanup(void)
+{
+	crypto_free_shash(shash);
+}
+
+module_init(digsig_init);
+module_exit(digsig_cleanup);
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 5009065d38c95455bd2d27c2838313e3dd0c5bc7 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Thu, 10 Nov 2011 11:32:25 +0200
Subject: iommu/core: stop converting bytes to page order back and forth

Express sizes in bytes rather than in page order, to eliminate the
size->order->size conversions we have whenever the IOMMU API is calling
the low level drivers' map/unmap methods.

Adopt all existing drivers.

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu.c   | 13 +++++--------
 drivers/iommu/intel-iommu.c | 11 ++++-------
 drivers/iommu/iommu.c       |  8 +++++---
 drivers/iommu/msm_iommu.c   | 19 +++++++------------
 drivers/iommu/omap-iommu.c  | 14 +++++---------
 include/linux/iommu.h       |  6 +++---
 6 files changed, 29 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4ee277a8521a..a3b7072e86e2 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2702,9 +2702,8 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 }
 
 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
-			 phys_addr_t paddr, int gfp_order, int iommu_prot)
+			 phys_addr_t paddr, size_t page_size, int iommu_prot)
 {
-	unsigned long page_size = 0x1000UL << gfp_order;
 	struct protection_domain *domain = dom->priv;
 	int prot = 0;
 	int ret;
@@ -2721,13 +2720,11 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 	return ret;
 }
 
-static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
-			   int gfp_order)
+static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
+			   size_t page_size)
 {
 	struct protection_domain *domain = dom->priv;
-	unsigned long page_size, unmap_size;
-
-	page_size  = 0x1000UL << gfp_order;
+	size_t unmap_size;
 
 	mutex_lock(&domain->api_lock);
 	unmap_size = iommu_unmap_page(domain, iova, page_size);
@@ -2735,7 +2732,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 
 	domain_flush_tlb_pde(domain);
 
-	return get_order(unmap_size);
+	return unmap_size;
 }
 
 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index c0c7820d4c46..2a165010a1c1 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3979,12 +3979,11 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
 
 static int intel_iommu_map(struct iommu_domain *domain,
 			   unsigned long iova, phys_addr_t hpa,
-			   int gfp_order, int iommu_prot)
+			   size_t size, int iommu_prot)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
 	u64 max_addr;
 	int prot = 0;
-	size_t size;
 	int ret;
 
 	if (iommu_prot & IOMMU_READ)
@@ -3994,7 +3993,6 @@ static int intel_iommu_map(struct iommu_domain *domain,
 	if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
 		prot |= DMA_PTE_SNP;
 
-	size     = PAGE_SIZE << gfp_order;
 	max_addr = iova + size;
 	if (dmar_domain->max_addr < max_addr) {
 		u64 end;
@@ -4017,11 +4015,10 @@ static int intel_iommu_map(struct iommu_domain *domain,
 	return ret;
 }
 
-static int intel_iommu_unmap(struct iommu_domain *domain,
-			     unsigned long iova, int gfp_order)
+static size_t intel_iommu_unmap(struct iommu_domain *domain,
+			     unsigned long iova, size_t size)
 {
 	struct dmar_domain *dmar_domain = domain->priv;
-	size_t size = PAGE_SIZE << gfp_order;
 	int order;
 
 	order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
@@ -4030,7 +4027,7 @@ static int intel_iommu_unmap(struct iommu_domain *domain,
 	if (dmar_domain->max_addr == iova + size)
 		dmar_domain->max_addr = iova;
 
-	return order;
+	return PAGE_SIZE << order;
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2fb2963df553..7a2953d8f12e 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -168,13 +168,13 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 
 	BUG_ON(!IS_ALIGNED(iova | paddr, size));
 
-	return domain->ops->map(domain, iova, paddr, gfp_order, prot);
+	return domain->ops->map(domain, iova, paddr, size, prot);
 }
 EXPORT_SYMBOL_GPL(iommu_map);
 
 int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
 {
-	size_t size;
+	size_t size, unmapped;
 
 	if (unlikely(domain->ops->unmap == NULL))
 		return -ENODEV;
@@ -183,6 +183,8 @@ int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
 
 	BUG_ON(!IS_ALIGNED(iova, size));
 
-	return domain->ops->unmap(domain, iova, gfp_order);
+	unmapped = domain->ops->unmap(domain, iova, size);
+
+	return get_order(unmapped);
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 5865dd2e28f9..13718d958da8 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -352,7 +352,7 @@ fail:
 }
 
 static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
-			 phys_addr_t pa, int order, int prot)
+			 phys_addr_t pa, size_t len, int prot)
 {
 	struct msm_priv *priv;
 	unsigned long flags;
@@ -363,7 +363,6 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
 	unsigned long *sl_pte;
 	unsigned long sl_offset;
 	unsigned int pgprot;
-	size_t len = 0x1000UL << order;
 	int ret = 0, tex, sh;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -463,8 +462,8 @@ fail:
 	return ret;
 }
 
-static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
-			    int order)
+static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
+			    size_t len)
 {
 	struct msm_priv *priv;
 	unsigned long flags;
@@ -474,7 +473,6 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
 	unsigned long *sl_table;
 	unsigned long *sl_pte;
 	unsigned long sl_offset;
-	size_t len = 0x1000UL << order;
 	int i, ret = 0;
 
 	spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -544,15 +542,12 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
 
 	ret = __flush_iotlb(domain);
 
-	/*
-	 * the IOMMU API requires us to return the order of the unmapped
-	 * page (on success).
-	 */
-	if (!ret)
-		ret = order;
 fail:
 	spin_unlock_irqrestore(&msm_iommu_lock, flags);
-	return ret;
+
+	/* the IOMMU API requires us to return how many bytes were unmapped */
+	len = ret ? 0 : len;
+	return len;
 }
 
 static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 8f32b2bf7587..ad80b1d0d099 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1019,12 +1019,11 @@ static void iopte_cachep_ctor(void *iopte)
 }
 
 static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
-			 phys_addr_t pa, int order, int prot)
+			 phys_addr_t pa, size_t bytes, int prot)
 {
 	struct omap_iommu_domain *omap_domain = domain->priv;
 	struct omap_iommu *oiommu = omap_domain->iommu_dev;
 	struct device *dev = oiommu->dev;
-	size_t bytes = PAGE_SIZE << order;
 	struct iotlb_entry e;
 	int omap_pgsz;
 	u32 ret, flags;
@@ -1049,19 +1048,16 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
 	return ret;
 }
 
-static int omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
-			    int order)
+static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
+			    size_t size)
 {
 	struct omap_iommu_domain *omap_domain = domain->priv;
 	struct omap_iommu *oiommu = omap_domain->iommu_dev;
 	struct device *dev = oiommu->dev;
-	size_t unmap_size;
 
-	dev_dbg(dev, "unmapping da 0x%lx order %d\n", da, order);
+	dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
 
-	unmap_size = iopgtable_clear_entry(oiommu, da);
-
-	return unmap_size ? get_order(unmap_size) : -EINVAL;
+	return iopgtable_clear_entry(oiommu, da);
 }
 
 static int
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 432acc4c054d..d5ebf3f4dd53 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -54,9 +54,9 @@ struct iommu_ops {
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
 	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
 	int (*map)(struct iommu_domain *domain, unsigned long iova,
-		   phys_addr_t paddr, int gfp_order, int prot);
-	int (*unmap)(struct iommu_domain *domain, unsigned long iova,
-		     int gfp_order);
+		   phys_addr_t paddr, size_t size, int prot);
+	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
+		     size_t size);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
 				    unsigned long iova);
 	int (*domain_has_cap)(struct iommu_domain *domain,
-- 
cgit v1.2.3


From 7d3002cc8c160dbda0e6ab9cd66dc6eb401b8b70 Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Thu, 10 Nov 2011 11:32:26 +0200
Subject: iommu/core: split mapping to page sizes as supported by the hardware

When mapping a memory region, split it to page sizes as supported
by the iommu hardware. Always prefer bigger pages, when possible,
in order to reduce the TLB pressure.

The logic to do that is now added to the IOMMU core, so neither the iommu
drivers themselves nor users of the IOMMU API have to duplicate it.

This allows a more lenient granularity of mappings; traditionally the
IOMMU API took 'order' (of a page) as a mapping size, and directly let
the low level iommu drivers handle the mapping, but now that the IOMMU
core can split arbitrary memory regions into pages, we can remove this
limitation, so users don't have to split those regions by themselves.

Currently the supported page sizes are advertised once and they then
remain static. That works well for OMAP and MSM but it would probably
not fly well with intel's hardware, where the page size capabilities
seem to have the potential to be different between several DMA
remapping devices.

register_iommu() currently sets a default pgsize behavior, so we can convert
the IOMMU drivers in subsequent patches. After all the drivers
are converted, the temporary default settings will be removed.

Mainline users of the IOMMU API (kvm and omap-iovmm) are adopted
to deal with bytes instead of page order.

Many thanks to Joerg Roedel <Joerg.Roedel@amd.com> for significant review!

Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: David Brown <davidb@codeaurora.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <Joerg.Roedel@amd.com>
Cc: Stepan Moskovchenko <stepanm@codeaurora.org>
Cc: KyongHo Cho <pullip.cho@samsung.com>
Cc: Hiroshi DOYU <hdoyu@nvidia.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: kvm@vger.kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/iommu.c      | 131 ++++++++++++++++++++++++++++++++++++++++-----
 drivers/iommu/omap-iovmm.c |  17 +++---
 include/linux/iommu.h      |  20 +++++--
 virt/kvm/iommu.c           |   8 +--
 4 files changed, 144 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 7a2953d8f12e..b278458d5816 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -16,6 +16,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#define pr_fmt(fmt)    "%s: " fmt, __func__
+
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
@@ -47,6 +49,16 @@ int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops)
 	if (bus->iommu_ops != NULL)
 		return -EBUSY;
 
+	/*
+	 * Set the default pgsize values, which retain the existing
+	 * IOMMU API behavior: drivers will be called to map
+	 * regions that are sized/aligned to order of 4KiB pages.
+	 *
+	 * This will be removed once all drivers are migrated.
+	 */
+	if (!ops->pgsize_bitmap)
+		ops->pgsize_bitmap = ~0xFFFUL;
+
 	bus->iommu_ops = ops;
 
 	/* Do IOMMU specific setup for this bus-type */
@@ -157,34 +169,125 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
 EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
 
 int iommu_map(struct iommu_domain *domain, unsigned long iova,
-	      phys_addr_t paddr, int gfp_order, int prot)
+	      phys_addr_t paddr, size_t size, int prot)
 {
-	size_t size;
+	unsigned long orig_iova = iova;
+	unsigned int min_pagesz;
+	size_t orig_size = size;
+	int ret = 0;
 
 	if (unlikely(domain->ops->map == NULL))
 		return -ENODEV;
 
-	size         = PAGE_SIZE << gfp_order;
+	/* find out the minimum page size supported */
+	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+
+	/*
+	 * both the virtual address and the physical one, as well as
+	 * the size of the mapping, must be aligned (at least) to the
+	 * size of the smallest page supported by the hardware
+	 */
+	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
+			"0x%x\n", iova, (unsigned long)paddr,
+			(unsigned long)size, min_pagesz);
+		return -EINVAL;
+	}
+
+	pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
+				(unsigned long)paddr, (unsigned long)size);
+
+	while (size) {
+		unsigned long pgsize, addr_merge = iova | paddr;
+		unsigned int pgsize_idx;
+
+		/* Max page size that still fits into 'size' */
+		pgsize_idx = __fls(size);
+
+		/* need to consider alignment requirements ? */
+		if (likely(addr_merge)) {
+			/* Max page size allowed by both iova and paddr */
+			unsigned int align_pgsize_idx = __ffs(addr_merge);
+
+			pgsize_idx = min(pgsize_idx, align_pgsize_idx);
+		}
+
+		/* build a mask of acceptable page sizes */
+		pgsize = (1UL << (pgsize_idx + 1)) - 1;
+
+		/* throw away page sizes not supported by the hardware */
+		pgsize &= domain->ops->pgsize_bitmap;
 
-	BUG_ON(!IS_ALIGNED(iova | paddr, size));
+		/* make sure we're still sane */
+		BUG_ON(!pgsize);
 
-	return domain->ops->map(domain, iova, paddr, size, prot);
+		/* pick the biggest page */
+		pgsize_idx = __fls(pgsize);
+		pgsize = 1UL << pgsize_idx;
+
+		pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
+					(unsigned long)paddr, pgsize);
+
+		ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
+		if (ret)
+			break;
+
+		iova += pgsize;
+		paddr += pgsize;
+		size -= pgsize;
+	}
+
+	/* unroll mapping in case something went wrong */
+	if (ret)
+		iommu_unmap(domain, orig_iova, orig_size - size);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_map);
 
-int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
+size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 {
-	size_t size, unmapped;
+	size_t unmapped_page, unmapped = 0;
+	unsigned int min_pagesz;
 
 	if (unlikely(domain->ops->unmap == NULL))
 		return -ENODEV;
 
-	size         = PAGE_SIZE << gfp_order;
-
-	BUG_ON(!IS_ALIGNED(iova, size));
-
-	unmapped = domain->ops->unmap(domain, iova, size);
-
-	return get_order(unmapped);
+	/* find out the minimum page size supported */
+	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+
+	/*
+	 * The virtual address, as well as the size of the mapping, must be
+	 * aligned (at least) to the size of the smallest page supported
+	 * by the hardware
+	 */
+	if (!IS_ALIGNED(iova | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
+					iova, (unsigned long)size, min_pagesz);
+		return -EINVAL;
+	}
+
+	pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
+							(unsigned long)size);
+
+	/*
+	 * Keep iterating until we either unmap 'size' bytes (or more)
+	 * or we hit an area that isn't mapped.
+	 */
+	while (unmapped < size) {
+		size_t left = size - unmapped;
+
+		unmapped_page = domain->ops->unmap(domain, iova, left);
+		if (!unmapped_page)
+			break;
+
+		pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
+					(unsigned long)unmapped_page);
+
+		iova += unmapped_page;
+		unmapped += unmapped_page;
+	}
+
+	return unmapped;
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c
index e8fdb8830f69..0b7b14cb030b 100644
--- a/drivers/iommu/omap-iovmm.c
+++ b/drivers/iommu/omap-iovmm.c
@@ -409,7 +409,6 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
 	unsigned int i, j;
 	struct scatterlist *sg;
 	u32 da = new->da_start;
-	int order;
 
 	if (!domain || !sgt)
 		return -EINVAL;
@@ -428,12 +427,10 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
 		if (bytes_to_iopgsz(bytes) < 0)
 			goto err_out;
 
-		order = get_order(bytes);
-
 		pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
 			 i, da, pa, bytes);
 
-		err = iommu_map(domain, da, pa, order, flags);
+		err = iommu_map(domain, da, pa, bytes, flags);
 		if (err)
 			goto err_out;
 
@@ -448,10 +445,9 @@ err_out:
 		size_t bytes;
 
 		bytes = sg->length + sg->offset;
-		order = get_order(bytes);
 
 		/* ignore failures.. we're already handling one */
-		iommu_unmap(domain, da, order);
+		iommu_unmap(domain, da, bytes);
 
 		da += bytes;
 	}
@@ -466,7 +462,8 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
 	size_t total = area->da_end - area->da_start;
 	const struct sg_table *sgt = area->sgt;
 	struct scatterlist *sg;
-	int i, err;
+	int i;
+	size_t unmapped;
 
 	BUG_ON(!sgtable_ok(sgt));
 	BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
@@ -474,13 +471,11 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
 	start = area->da_start;
 	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
 		size_t bytes;
-		int order;
 
 		bytes = sg->length + sg->offset;
-		order = get_order(bytes);
 
-		err = iommu_unmap(domain, start, order);
-		if (err < 0)
+		unmapped = iommu_unmap(domain, start, bytes);
+		if (unmapped < bytes)
 			break;
 
 		dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d5ebf3f4dd53..cc26f89c4ee6 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -48,6 +48,19 @@ struct iommu_domain {
 
 #ifdef CONFIG_IOMMU_API
 
+/**
+ * struct iommu_ops - iommu ops and capabilities
+ * @domain_init: init iommu domain
+ * @domain_destroy: destroy iommu domain
+ * @attach_dev: attach device to an iommu domain
+ * @detach_dev: detach device from an iommu domain
+ * @map: map a physically contiguous memory region to an iommu domain
+ * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @iova_to_phys: translate iova to physical address
+ * @domain_has_cap: domain capabilities query
+ * @commit: commit iommu domain
+ * @pgsize_bitmap: bitmap of supported page sizes
+ */
 struct iommu_ops {
 	int (*domain_init)(struct iommu_domain *domain);
 	void (*domain_destroy)(struct iommu_domain *domain);
@@ -61,6 +74,7 @@ struct iommu_ops {
 				    unsigned long iova);
 	int (*domain_has_cap)(struct iommu_domain *domain,
 			      unsigned long cap);
+	unsigned long pgsize_bitmap;
 };
 
 extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops);
@@ -72,9 +86,9 @@ extern int iommu_attach_device(struct iommu_domain *domain,
 extern void iommu_detach_device(struct iommu_domain *domain,
 				struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
-		     phys_addr_t paddr, int gfp_order, int prot);
-extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-		       int gfp_order);
+		     phys_addr_t paddr, size_t size, int prot);
+extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+		       size_t size);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
 				      unsigned long iova);
 extern int iommu_domain_has_cap(struct iommu_domain *domain,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index a195c07fa829..304d7e5717e9 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -113,7 +113,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 
 		/* Map into IO address space */
 		r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
-			      get_order(page_size), flags);
+			      page_size, flags);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%llx\n", pfn);
@@ -292,15 +292,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 
 	while (gfn < end_gfn) {
 		unsigned long unmap_pages;
-		int order;
+		size_t size;
 
 		/* Get physical address */
 		phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
 		pfn  = phys >> PAGE_SHIFT;
 
 		/* Unmap address from IO address space */
-		order       = iommu_unmap(domain, gfn_to_gpa(gfn), 0);
-		unmap_pages = 1ULL << order;
+		size       = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
+		unmap_pages = 1ULL << get_order(size);
 
 		/* Unpin all pages we just unmapped to not leak any memory */
 		kvm_unpin_pages(kvm, pfn, unmap_pages);
-- 
cgit v1.2.3


From 39ce61a846c8e1fa00cb57ad5af021542e6e8403 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 11 Nov 2011 12:46:23 -0200
Subject: [media] dvb: Allow select between DVB-C Annex A and Annex C

DVB-C, as defined by ITU-T J.83 has 3 annexes. The differences between
Annex A and Annex C is that Annex C uses a subset of the modulation
types, and uses a different rolloff factor. A different rolloff means
that the bandwidth required is slicely different, and may affect the
saw filter configuration at the tuners. Also, some demods have different
configurations, depending on using Annex A or Annex C.

So, allow userspace to specify it, by changing the rolloff factor.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 4 ++++
 drivers/media/dvb/dvb-core/dvb_frontend.c       | 2 ++
 include/linux/dvb/frontend.h                    | 2 ++
 3 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 3bc8a61efe30..6ac803959a47 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -311,6 +311,8 @@ typedef enum fe_rolloff {
 	ROLLOFF_20,
 	ROLLOFF_25,
 	ROLLOFF_AUTO,
+	ROLLOFF_15, /* DVB-C Annex A */
+	ROLLOFF_13, /* DVB-C Annex C */
 } fe_rolloff_t;
 		</programlisting>
 		</section>
@@ -778,8 +780,10 @@ typedef enum fe_hierarchy {
 			<listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
 			<listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
 			<listitem><para><link linkend="DTV-SYMBOL-RATE"><constant>DTV_SYMBOL_RATE</constant></link></para></listitem>
+			<listitem><para><link linkend="DTV-ROLLOFF"><constant>DTV_ROLLOFF</constant></link></para></listitem>
 			<listitem><para><link linkend="DTV-INNER-FEC"><constant>DTV_INNER_FEC</constant></link></para></listitem>
 		</itemizedlist>
+		<para>The Rolloff of 0.15 (ROLLOFF_15) is assumed, as ITU-T J.83 Annex A is more common. For Annex C, rolloff should be 0.13 (ROLLOFF_13). All other values are invalid.</para>
 	</section>
 	<section id="dvbc-annex-b-params">
 		<title>DVB-C Annex B delivery system</title>
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 2c0acdb4d811..c849455458ea 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -876,6 +876,7 @@ static int dvb_frontend_clear_cache(struct dvb_frontend *fe)
 	c->symbol_rate = QAM_AUTO;
 	c->code_rate_HP = FEC_AUTO;
 	c->code_rate_LP = FEC_AUTO;
+	c->rolloff = ROLLOFF_AUTO;
 
 	c->isdbt_partial_reception = -1;
 	c->isdbt_sb_mode = -1;
@@ -1030,6 +1031,7 @@ static void dtv_property_cache_init(struct dvb_frontend *fe,
 		break;
 	case FE_QAM:
 		c->delivery_system = SYS_DVBC_ANNEX_AC;
+		c->rolloff = ROLLOFF_15; /* implied for Annex A */
 		break;
 	case FE_OFDM:
 		c->delivery_system = SYS_DVBT;
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 1b1094c35e4f..d9251df867b5 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -329,6 +329,8 @@ typedef enum fe_rolloff {
 	ROLLOFF_20,
 	ROLLOFF_25,
 	ROLLOFF_AUTO,
+	ROLLOFF_15,	/* DVB-C Annex A */
+	ROLLOFF_13,	/* DVB-C Annex C */
 } fe_rolloff_t;
 
 typedef enum fe_delivery_system {
-- 
cgit v1.2.3


From 5a6b5e02d673486c96003d4a6e3e2510f4c59f92 Mon Sep 17 00:00:00 2001
From: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Date: Sat, 3 Sep 2011 01:36:08 +0000
Subject: fbdev: remove display subsystem

This four year old subsystem does not have a single in-tree user
not even in staging and as far as I know also none out-of-tree.
I think that justifies removing it which cleans the config up.

Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Acked-by: James Simmons <jsimmons@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/video/Kconfig                 |   1 -
 drivers/video/Makefile                |   2 +-
 drivers/video/display/Kconfig         |  24 ----
 drivers/video/display/Makefile        |   6 -
 drivers/video/display/display-sysfs.c | 219 ----------------------------------
 include/linux/display.h               |  61 ----------
 6 files changed, 1 insertion(+), 312 deletions(-)
 delete mode 100644 drivers/video/display/Kconfig
 delete mode 100644 drivers/video/display/Makefile
 delete mode 100644 drivers/video/display/display-sysfs.c
 delete mode 100644 include/linux/display.h

(limited to 'include/linux')

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 8165c5577d71..1b5b98f29482 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -2413,7 +2413,6 @@ source "drivers/video/omap/Kconfig"
 source "drivers/video/omap2/Kconfig"
 
 source "drivers/video/backlight/Kconfig"
-source "drivers/video/display/Kconfig"
 
 if VT
 	source "drivers/video/console/Kconfig"
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 9b9d8fff7732..142606814d98 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -13,7 +13,7 @@ fb-objs                           := $(fb-y)
 
 obj-$(CONFIG_VT)		  += console/
 obj-$(CONFIG_LOGO)		  += logo/
-obj-y				  += backlight/ display/
+obj-y				  += backlight/
 
 obj-$(CONFIG_FB_CFB_FILLRECT)  += cfbfillrect.o
 obj-$(CONFIG_FB_CFB_COPYAREA)  += cfbcopyarea.o
diff --git a/drivers/video/display/Kconfig b/drivers/video/display/Kconfig
deleted file mode 100644
index f99af931d4f8..000000000000
--- a/drivers/video/display/Kconfig
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Display drivers configuration
-#
-
-menu "Display device support"
-
-config DISPLAY_SUPPORT
-	tristate "Display panel/monitor support"
-	---help---
-	  This framework adds support for low-level control of a display.
-	  This includes support for power.
-
-	  Enable this to be able to choose the drivers for controlling the
-	  physical display panel/monitor on some platforms. This not only
-	  covers LCD displays for PDAs but also other types of displays
-	  such as CRT, TVout etc.
-
-	  To have support for your specific display panel you will have to
-	  select the proper drivers which depend on this option.
-
-comment "Display hardware drivers"
-	depends on DISPLAY_SUPPORT
-
-endmenu
diff --git a/drivers/video/display/Makefile b/drivers/video/display/Makefile
deleted file mode 100644
index c0ea832bf171..000000000000
--- a/drivers/video/display/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# Display drivers
-
-display-objs				:= display-sysfs.o
-
-obj-$(CONFIG_DISPLAY_SUPPORT)		+= display.o
-
diff --git a/drivers/video/display/display-sysfs.c b/drivers/video/display/display-sysfs.c
deleted file mode 100644
index 0c647d7af0ee..000000000000
--- a/drivers/video/display/display-sysfs.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- *  display-sysfs.c - Display output driver sysfs interface
- *
- *  Copyright (C) 2007 James Simmons <jsimmons@infradead.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or (at
- *  your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#include <linux/module.h>
-#include <linux/display.h>
-#include <linux/ctype.h>
-#include <linux/idr.h>
-#include <linux/err.h>
-#include <linux/kdev_t.h>
-#include <linux/slab.h>
-
-static ssize_t display_show_name(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	return snprintf(buf, PAGE_SIZE, "%s\n", dsp->name);
-}
-
-static ssize_t display_show_type(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	return snprintf(buf, PAGE_SIZE, "%s\n", dsp->type);
-}
-
-static ssize_t display_show_contrast(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	ssize_t rc = -ENXIO;
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver) && dsp->driver->get_contrast)
-		rc = sprintf(buf, "%d\n", dsp->driver->get_contrast(dsp));
-	mutex_unlock(&dsp->lock);
-	return rc;
-}
-
-static ssize_t display_store_contrast(struct device *dev,
-					struct device_attribute *attr,
-					const char *buf, size_t count)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	ssize_t ret = -EINVAL, size;
-	int contrast;
-	char *endp;
-
-	contrast = simple_strtoul(buf, &endp, 0);
-	size = endp - buf;
-
-	if (isspace(*endp))
-		size++;
-
-	if (size != count)
-		return ret;
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver && dsp->driver->set_contrast)) {
-		pr_debug("display: set contrast to %d\n", contrast);
-		dsp->driver->set_contrast(dsp, contrast);
-		ret = count;
-	}
-	mutex_unlock(&dsp->lock);
-	return ret;
-}
-
-static ssize_t display_show_max_contrast(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-	ssize_t rc = -ENXIO;
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver))
-		rc = sprintf(buf, "%d\n", dsp->driver->max_contrast);
-	mutex_unlock(&dsp->lock);
-	return rc;
-}
-
-static struct device_attribute display_attrs[] = {
-	__ATTR(name, S_IRUGO, display_show_name, NULL),
-	__ATTR(type, S_IRUGO, display_show_type, NULL),
-	__ATTR(contrast, S_IRUGO | S_IWUSR, display_show_contrast, display_store_contrast),
-	__ATTR(max_contrast, S_IRUGO, display_show_max_contrast, NULL),
-};
-
-static int display_suspend(struct device *dev, pm_message_t state)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver->suspend))
-		dsp->driver->suspend(dsp, state);
-	mutex_unlock(&dsp->lock);
-	return 0;
-};
-
-static int display_resume(struct device *dev)
-{
-	struct display_device *dsp = dev_get_drvdata(dev);
-
-	mutex_lock(&dsp->lock);
-	if (likely(dsp->driver->resume))
-		dsp->driver->resume(dsp);
-	mutex_unlock(&dsp->lock);
-	return 0;
-};
-
-static struct mutex allocated_dsp_lock;
-static DEFINE_IDR(allocated_dsp);
-static struct class *display_class;
-
-struct display_device *display_device_register(struct display_driver *driver,
-						struct device *parent, void *devdata)
-{
-	struct display_device *new_dev = NULL;
-	int ret = -EINVAL;
-
-	if (unlikely(!driver))
-		return ERR_PTR(ret);
-
-	mutex_lock(&allocated_dsp_lock);
-	ret = idr_pre_get(&allocated_dsp, GFP_KERNEL);
-	mutex_unlock(&allocated_dsp_lock);
-	if (!ret)
-		return ERR_PTR(ret);
-
-	new_dev = kzalloc(sizeof(struct display_device), GFP_KERNEL);
-	if (likely(new_dev) && unlikely(driver->probe(new_dev, devdata))) {
-		// Reserve the index for this display
-		mutex_lock(&allocated_dsp_lock);
-		ret = idr_get_new(&allocated_dsp, new_dev, &new_dev->idx);
-		mutex_unlock(&allocated_dsp_lock);
-
-		if (!ret) {
-			new_dev->dev = device_create(display_class, parent,
-						     MKDEV(0, 0), new_dev,
-						     "display%d", new_dev->idx);
-			if (!IS_ERR(new_dev->dev)) {
-				new_dev->parent = parent;
-				new_dev->driver = driver;
-				mutex_init(&new_dev->lock);
-				return new_dev;
-			}
-			mutex_lock(&allocated_dsp_lock);
-			idr_remove(&allocated_dsp, new_dev->idx);
-			mutex_unlock(&allocated_dsp_lock);
-			ret = -EINVAL;
-		}
-	}
-	kfree(new_dev);
-	return ERR_PTR(ret);
-}
-EXPORT_SYMBOL(display_device_register);
-
-void display_device_unregister(struct display_device *ddev)
-{
-	if (!ddev)
-		return;
-	// Free device
-	mutex_lock(&ddev->lock);
-	device_unregister(ddev->dev);
-	mutex_unlock(&ddev->lock);
-	// Mark device index as available
-	mutex_lock(&allocated_dsp_lock);
-	idr_remove(&allocated_dsp, ddev->idx);
-	mutex_unlock(&allocated_dsp_lock);
-	kfree(ddev);
-}
-EXPORT_SYMBOL(display_device_unregister);
-
-static int __init display_class_init(void)
-{
-	display_class = class_create(THIS_MODULE, "display");
-	if (IS_ERR(display_class)) {
-		printk(KERN_ERR "Failed to create display class\n");
-		display_class = NULL;
-		return -EINVAL;
-	}
-	display_class->dev_attrs = display_attrs;
-	display_class->suspend = display_suspend;
-	display_class->resume = display_resume;
-	mutex_init(&allocated_dsp_lock);
-	return 0;
-}
-
-static void __exit display_class_exit(void)
-{
-	class_destroy(display_class);
-}
-
-module_init(display_class_init);
-module_exit(display_class_exit);
-
-MODULE_DESCRIPTION("Display Hardware handling");
-MODULE_AUTHOR("James Simmons <jsimmons@infradead.org>");
-MODULE_LICENSE("GPL");
-
diff --git a/include/linux/display.h b/include/linux/display.h
deleted file mode 100644
index 3bf70d639728..000000000000
--- a/include/linux/display.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- *  Copyright (C) 2006 James Simmons <jsimmons@infradead.org>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or (at
- *  your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-
-#ifndef _LINUX_DISPLAY_H
-#define _LINUX_DISPLAY_H
-
-#include <linux/device.h>
-
-struct display_device;
-
-/* This structure defines all the properties of a Display. */
-struct display_driver {
-	int  (*set_contrast)(struct display_device *, unsigned int);
-	int  (*get_contrast)(struct display_device *);
-	void (*suspend)(struct display_device *, pm_message_t state);
-	void (*resume)(struct display_device *);
-	int  (*probe)(struct display_device *, void *);
-	int  (*remove)(struct display_device *);
-	int  max_contrast;
-};
-
-struct display_device {
-	struct module *owner;			/* Owner module */
-	struct display_driver *driver;
-	struct device *parent;			/* This is the parent */
-	struct device *dev;			/* This is this display device */
-	struct mutex lock;
-	void *priv_data;
-	char type[16];
-	char *name;
-	int idx;
-};
-
-extern struct display_device *display_device_register(struct display_driver *driver,
-					struct device *dev, void *devdata);
-extern void display_device_unregister(struct display_device *dev);
-
-extern int probe_edid(struct display_device *dev, void *devdata);
-
-#define to_display_device(obj) container_of(obj, struct display_device, class_dev)
-
-#endif
-- 
cgit v1.2.3


From 38eb6863ed21de9beab792f66cd282c21e0dc10b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 20 Oct 2011 13:42:22 +0200
Subject: zorro: Rename Picasso IV Z2 "MEM" to "RAM" for consistency

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
---
 include/linux/zorro_ids.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zorro_ids.h b/include/linux/zorro_ids.h
index 7e749088910d..74bc53bcfdcf 100644
--- a/include/linux/zorro_ids.h
+++ b/include/linux/zorro_ids.h
@@ -360,8 +360,8 @@
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_II_II_PLUS_RAM	ZORRO_ID(VILLAGE_TRONIC, 0x0B, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_II_II_PLUS_REG	ZORRO_ID(VILLAGE_TRONIC, 0x0C, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_II_II_PLUS_SEGMENTED_MODE	ZORRO_ID(VILLAGE_TRONIC, 0x0D, 0)
-#define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_MEM1		ZORRO_ID(VILLAGE_TRONIC, 0x15, 0)
-#define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_MEM2		ZORRO_ID(VILLAGE_TRONIC, 0x16, 0)
+#define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_RAM1		ZORRO_ID(VILLAGE_TRONIC, 0x15, 0)
+#define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_RAM2		ZORRO_ID(VILLAGE_TRONIC, 0x16, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z2_REG		ZORRO_ID(VILLAGE_TRONIC, 0x17, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_PICASSO_IV_Z3		ZORRO_ID(VILLAGE_TRONIC, 0x18, 0)
 #define  ZORRO_PROD_VILLAGE_TRONIC_ARIADNE			ZORRO_ID(VILLAGE_TRONIC, 0xC9, 0)
-- 
cgit v1.2.3


From 1460432cb513f0c16136ed132c20ecfbf8ccf942 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 21 Oct 2011 15:56:05 -0400
Subject: iommu: Add iommu_device_group callback and iommu_group sysfs entry

An IOMMU group is a set of devices for which the IOMMU cannot
distinguish transactions.  For PCI devices, a group often occurs
when a PCI bridge is involved.  Transactions from any device
behind the bridge appear to be sourced from the bridge itself.
We leave it to the IOMMU driver to define the grouping restraints
for their platform.

Using this new interface, the group for a device can be retrieved
using the iommu_device_group() callback.  Users will compare the
value returned against the value returned for other devices to
determine whether they are part of the same group.  Devices with
no group are not translated by the IOMMU.  There should be no
expectations about the group numbers as they may be arbitrarily
assigned by the IOMMU driver and may not be persistent across boots.

We also provide a sysfs interface to the group numbers here so
that userspace can understand IOMMU dependencies between devices
for managing safe, userspace drivers.

[Some code changes by Joerg Roedel <joerg.roedel@amd.com>]

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/iommu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/iommu.h |  7 ++++++
 2 files changed, 67 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 2fb2963df553..9c35be4b333f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -25,8 +25,59 @@
 #include <linux/errno.h>
 #include <linux/iommu.h>
 
+static ssize_t show_iommu_group(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned int groupid;
+
+	if (iommu_device_group(dev, &groupid))
+		return 0;
+
+	return sprintf(buf, "%u", groupid);
+}
+static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL);
+
+static int add_iommu_group(struct device *dev, void *data)
+{
+	unsigned int groupid;
+
+	if (iommu_device_group(dev, &groupid) == 0)
+		return device_create_file(dev, &dev_attr_iommu_group);
+
+	return 0;
+}
+
+static int remove_iommu_group(struct device *dev)
+{
+	unsigned int groupid;
+
+	if (iommu_device_group(dev, &groupid) == 0)
+		device_remove_file(dev, &dev_attr_iommu_group);
+
+	return 0;
+}
+
+static int iommu_device_notifier(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct device *dev = data;
+
+	if (action == BUS_NOTIFY_ADD_DEVICE)
+		return add_iommu_group(dev, NULL);
+	else if (action == BUS_NOTIFY_DEL_DEVICE)
+		return remove_iommu_group(dev);
+
+	return 0;
+}
+
+static struct notifier_block iommu_device_nb = {
+	.notifier_call = iommu_device_notifier,
+};
+
 static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
 {
+	bus_register_notifier(bus, &iommu_device_nb);
+	bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
 }
 
 /**
@@ -186,3 +237,12 @@ int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
 	return domain->ops->unmap(domain, iova, gfp_order);
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
+
+int iommu_device_group(struct device *dev, unsigned int *groupid)
+{
+	if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group)
+		return dev->bus->iommu_ops->device_group(dev, groupid);
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(iommu_device_group);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 432acc4c054d..93617e7779a1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -61,6 +61,7 @@ struct iommu_ops {
 				    unsigned long iova);
 	int (*domain_has_cap)(struct iommu_domain *domain,
 			      unsigned long cap);
+	int (*device_group)(struct device *dev, unsigned int *groupid);
 };
 
 extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops);
@@ -81,6 +82,7 @@ extern int iommu_domain_has_cap(struct iommu_domain *domain,
 				unsigned long cap);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 					iommu_fault_handler_t handler);
+extern int iommu_device_group(struct device *dev, unsigned int *groupid);
 
 /**
  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
@@ -179,6 +181,11 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 {
 }
 
+static inline int iommu_device_group(struct device *dev, unsigned int *groupid);
+{
+	return -ENODEV;
+}
+
 #endif /* CONFIG_IOMMU_API */
 
 #endif /* __LINUX_IOMMU_H */
-- 
cgit v1.2.3


From 95bdaf71ccf2cb4bba0c9a3d2baea0e7916f466b Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 15 Nov 2011 12:48:29 +0100
Subject: iommu: Fix compile error with !IOMMU_API

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 include/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 93617e7779a1..0f318fd549be 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -181,7 +181,7 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 {
 }
 
-static inline int iommu_device_group(struct device *dev, unsigned int *groupid);
+static inline int iommu_device_group(struct device *dev, unsigned int *groupid)
 {
 	return -ENODEV;
 }
-- 
cgit v1.2.3


From 8d964a2872ea0914e00bc7798e68899e01715185 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 7 Nov 2011 23:59:41 -0800
Subject: Input: samsung-keypad - enable compiling on other platforms

There is nothing in keypad platform definitions that requires
the driver be complied on Samsung platform only, so let's move them
out of the platform subdirectory and relax the dependencies.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 arch/arm/plat-samsung/include/plat/keypad.h | 27 +-----------------
 drivers/input/keyboard/Kconfig              |  5 ++--
 drivers/input/keyboard/samsung-keypad.c     |  2 +-
 include/linux/input/samsung-keypad.h        | 43 +++++++++++++++++++++++++++++
 4 files changed, 48 insertions(+), 29 deletions(-)
 create mode 100644 include/linux/input/samsung-keypad.h

(limited to 'include/linux')

diff --git a/arch/arm/plat-samsung/include/plat/keypad.h b/arch/arm/plat-samsung/include/plat/keypad.h
index b59a6483cd8a..c81ace332a1e 100644
--- a/arch/arm/plat-samsung/include/plat/keypad.h
+++ b/arch/arm/plat-samsung/include/plat/keypad.h
@@ -13,32 +13,7 @@
 #ifndef __PLAT_SAMSUNG_KEYPAD_H
 #define __PLAT_SAMSUNG_KEYPAD_H
 
-#include <linux/input/matrix_keypad.h>
-
-#define SAMSUNG_MAX_ROWS	8
-#define SAMSUNG_MAX_COLS	8
-
-/**
- * struct samsung_keypad_platdata - Platform device data for Samsung Keypad.
- * @keymap_data: pointer to &matrix_keymap_data.
- * @rows: number of keypad row supported.
- * @cols: number of keypad col supported.
- * @no_autorepeat: disable key autorepeat.
- * @wakeup: controls whether the device should be set up as wakeup source.
- * @cfg_gpio: configure the GPIO.
- *
- * Initialisation data specific to either the machine or the platform
- * for the device driver to use or call-back when configuring gpio.
- */
-struct samsung_keypad_platdata {
-	const struct matrix_keymap_data	*keymap_data;
-	unsigned int rows;
-	unsigned int cols;
-	bool no_autorepeat;
-	bool wakeup;
-
-	void (*cfg_gpio)(unsigned int rows, unsigned int cols);
-};
+#include <linux/input/samsung-keypad.h>
 
 /**
  * samsung_keypad_set_platdata - Set platform data for Samsung Keypad device.
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 90d5f0a8f882..cdc385b2cf7d 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -441,9 +441,10 @@ config KEYBOARD_PMIC8XXX
 
 config KEYBOARD_SAMSUNG
 	tristate "Samsung keypad support"
-	depends on SAMSUNG_DEV_KEYPAD
+	depends on HAVE_CLK
 	help
-	  Say Y here if you want to use the Samsung keypad.
+	  Say Y here if you want to use the keypad on your Samsung mobile
+	  device.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called samsung-keypad.
diff --git a/drivers/input/keyboard/samsung-keypad.c b/drivers/input/keyboard/samsung-keypad.c
index d244fdf9ecdf..1a2b755564f2 100644
--- a/drivers/input/keyboard/samsung-keypad.c
+++ b/drivers/input/keyboard/samsung-keypad.c
@@ -22,7 +22,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
-#include <plat/keypad.h>
+#include <linux/input/samsung-keypad.h>
 
 #define SAMSUNG_KEYIFCON			0x00
 #define SAMSUNG_KEYIFSTSCLR			0x04
diff --git a/include/linux/input/samsung-keypad.h b/include/linux/input/samsung-keypad.h
new file mode 100644
index 000000000000..f25619bfd8a8
--- /dev/null
+++ b/include/linux/input/samsung-keypad.h
@@ -0,0 +1,43 @@
+/*
+ * Samsung Keypad platform data definitions
+ *
+ * Copyright (C) 2010 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __SAMSUNG_KEYPAD_H
+#define __SAMSUNG_KEYPAD_H
+
+#include <linux/input/matrix_keypad.h>
+
+#define SAMSUNG_MAX_ROWS	8
+#define SAMSUNG_MAX_COLS	8
+
+/**
+ * struct samsung_keypad_platdata - Platform device data for Samsung Keypad.
+ * @keymap_data: pointer to &matrix_keymap_data.
+ * @rows: number of keypad row supported.
+ * @cols: number of keypad col supported.
+ * @no_autorepeat: disable key autorepeat.
+ * @wakeup: controls whether the device should be set up as wakeup source.
+ * @cfg_gpio: configure the GPIO.
+ *
+ * Initialisation data specific to either the machine or the platform
+ * for the device driver to use or call-back when configuring gpio.
+ */
+struct samsung_keypad_platdata {
+	const struct matrix_keymap_data	*keymap_data;
+	unsigned int rows;
+	unsigned int cols;
+	bool no_autorepeat;
+	bool wakeup;
+
+	void (*cfg_gpio)(unsigned int rows, unsigned int cols);
+};
+
+#endif /* __SAMSUNG_KEYPAD_H */
-- 
cgit v1.2.3


From 1933ca8771585d43d3d2099c0c9ba7ca6b96e303 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 15 Nov 2011 15:11:44 -0800
Subject: include/linux/security.h: fix security_inode_init_security() arg

Make the security_inode_init_security() initxattrs arg const, to match the
non-stubbed version of that function.

Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 19d8e04e1688..051d4e96cb1f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2044,7 +2044,7 @@ static inline void security_inode_free(struct inode *inode)
 static inline int security_inode_init_security(struct inode *inode,
 						struct inode *dir,
 						const struct qstr *qstr,
-						initxattrs initxattrs,
+						const initxattrs initxattrs,
 						void *fs_data)
 {
 	return 0;
-- 
cgit v1.2.3


From 7845bc3964756240863ae453ffe4f7ee27ddc954 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 16 Nov 2011 11:15:54 +0000
Subject: KEYS: Give key types their own lockdep class for key->sem

Give keys their own lockdep class to differentiate them from each other in case
a key of one type has to refer to a key of another type.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key-type.h | 1 +
 security/keys/key.c      | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 9efd081bb31e..39e3c082c49d 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -92,6 +92,7 @@ struct key_type {
 
 	/* internal fields */
 	struct list_head	link;		/* link in types list */
+	struct lock_class_key	lock_class;	/* key->sem lock class */
 };
 
 extern struct key_type key_type_keyring;
diff --git a/security/keys/key.c b/security/keys/key.c
index 4414abddcb5b..4f64c7267afb 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -291,6 +291,7 @@ struct key *key_alloc(struct key_type *type, const char *desc,
 
 	atomic_set(&key->usage, 1);
 	init_rwsem(&key->sem);
+	lockdep_set_class(&key->sem, &type->lock_class);
 	key->type = type;
 	key->user = user;
 	key->quotalen = quotalen;
@@ -946,6 +947,8 @@ int register_key_type(struct key_type *ktype)
 	struct key_type *p;
 	int ret;
 
+	memset(&ktype->lock_class, 0, sizeof(ktype->lock_class));
+
 	ret = -EEXIST;
 	down_write(&key_types_sem);
 
-- 
cgit v1.2.3


From b14dab792dee3245b628e046d80a7fad5573fea6 Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Thu, 13 Oct 2011 12:33:30 +0530
Subject: DMAEngine: Define interleaved transfer request api

Define a new api that could be used for doing fancy data transfers
like interleaved to contiguous copy and vice-versa.
Traditional SG_list based transfers tend to be very inefficient in
such cases as where the interleave and chunk are only a few bytes,
which call for a very condensed api to convey pattern of the transfer.
This api supports all 4 variants of scatter-gather and contiguous transfer.

Of course, neither can this api help transfers that don't lend to DMA by
nature, i.e, scattered tiny read/writes with no periodic pattern.

Also since now we support SLAVE channels that might not provide
device_prep_slave_sg callback but device_prep_interleaved_dma,
remove the BUG_ON check.

Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
Acked-by: Barry Song <Baohua.Song@csr.com>
[renamed dmaxfer_template to dma_interleaved_template
 did fixup after the enum dma_transfer_merge]
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 Documentation/dmaengine.txt |  8 +++++
 drivers/dma/dmaengine.c     |  4 +--
 include/linux/dmaengine.h   | 78 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 85 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt
index 94b7e0f96b38..bbe6cb3d1856 100644
--- a/Documentation/dmaengine.txt
+++ b/Documentation/dmaengine.txt
@@ -75,6 +75,10 @@ The slave DMA usage consists of following steps:
    slave_sg	- DMA a list of scatter gather buffers from/to a peripheral
    dma_cyclic	- Perform a cyclic DMA operation from/to a peripheral till the
 		  operation is explicitly stopped.
+   interleaved_dma - This is common to Slave as well as M2M clients. For slave
+		 address of devices' fifo could be already known to the driver.
+		 Various types of operations could be expressed by setting
+		 appropriate values to the 'dma_interleaved_template' members.
 
    A non-NULL return of this transfer API represents a "descriptor" for
    the given transaction.
@@ -89,6 +93,10 @@ The slave DMA usage consists of following steps:
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_data_direction direction);
 
+	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags);
+
    The peripheral driver is expected to have mapped the scatterlist for
    the DMA operation prior to calling device_prep_slave_sg, and must
    keep the scatterlist mapped until the DMA operation has completed.
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index b48967b499da..a6c6051ec858 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -693,12 +693,12 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_interrupt);
 	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
 		!device->device_prep_dma_sg);
-	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
-		!device->device_prep_slave_sg);
 	BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) &&
 		!device->device_prep_dma_cyclic);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_control);
+	BUG_ON(dma_has_cap(DMA_INTERLEAVE, device->cap_mask) &&
+		!device->device_prep_interleaved_dma);
 
 	BUG_ON(!device->device_alloc_chan_resources);
 	BUG_ON(!device->device_free_chan_resources);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index a865b3a354cd..5532bb8b500c 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -71,10 +71,10 @@ enum dma_transaction_type {
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
 	DMA_CYCLIC,
-};
-
+	DMA_INTERLEAVE,
 /* last transaction type for creation of the capabilities mask */
-#define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
+	DMA_TX_TYPE_END,
+};
 
 /**
  * enum dma_transfer_direction - dma transfer mode and direction indicator
@@ -90,6 +90,74 @@ enum dma_transfer_direction {
 	DMA_DEV_TO_DEV,
 };
 
+/**
+ * Interleaved Transfer Request
+ * ----------------------------
+ * A chunk is collection of contiguous bytes to be transfered.
+ * The gap(in bytes) between two chunks is called inter-chunk-gap(ICG).
+ * ICGs may or maynot change between chunks.
+ * A FRAME is the smallest series of contiguous {chunk,icg} pairs,
+ *  that when repeated an integral number of times, specifies the transfer.
+ * A transfer template is specification of a Frame, the number of times
+ *  it is to be repeated and other per-transfer attributes.
+ *
+ * Practically, a client driver would have ready a template for each
+ *  type of transfer it is going to need during its lifetime and
+ *  set only 'src_start' and 'dst_start' before submitting the requests.
+ *
+ *
+ *  |      Frame-1        |       Frame-2       | ~ |       Frame-'numf'  |
+ *  |====....==.===...=...|====....==.===...=...| ~ |====....==.===...=...|
+ *
+ *    ==  Chunk size
+ *    ... ICG
+ */
+
+/**
+ * struct data_chunk - Element of scatter-gather list that makes a frame.
+ * @size: Number of bytes to read from source.
+ *	  size_dst := fn(op, size_src), so doesn't mean much for destination.
+ * @icg: Number of bytes to jump after last src/dst address of this
+ *	 chunk and before first src/dst address for next chunk.
+ *	 Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false.
+ *	 Ignored for src(assumed 0), if src_inc is true and src_sgl is false.
+ */
+struct data_chunk {
+	size_t size;
+	size_t icg;
+};
+
+/**
+ * struct dma_interleaved_template - Template to convey DMAC the transfer pattern
+ *	 and attributes.
+ * @src_start: Bus address of source for the first chunk.
+ * @dst_start: Bus address of destination for the first chunk.
+ * @dir: Specifies the type of Source and Destination.
+ * @src_inc: If the source address increments after reading from it.
+ * @dst_inc: If the destination address increments after writing to it.
+ * @src_sgl: If the 'icg' of sgl[] applies to Source (scattered read).
+ *		Otherwise, source is read contiguously (icg ignored).
+ *		Ignored if src_inc is false.
+ * @dst_sgl: If the 'icg' of sgl[] applies to Destination (scattered write).
+ *		Otherwise, destination is filled contiguously (icg ignored).
+ *		Ignored if dst_inc is false.
+ * @numf: Number of frames in this template.
+ * @frame_size: Number of chunks in a frame i.e, size of sgl[].
+ * @sgl: Array of {chunk,icg} pairs that make up a frame.
+ */
+struct dma_interleaved_template {
+	dma_addr_t src_start;
+	dma_addr_t dst_start;
+	enum dma_transfer_direction dir;
+	bool src_inc;
+	bool dst_inc;
+	bool src_sgl;
+	bool dst_sgl;
+	size_t numf;
+	size_t frame_size;
+	struct data_chunk sgl[0];
+};
+
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
  *  control completion, and communicate status.
@@ -445,6 +513,7 @@ struct dma_tx_state {
  * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
  *	The function takes a buffer of size buf_len. The callback function will
  *	be called after period_len bytes have been transferred.
+ * @device_prep_interleaved_dma: Transfer expression in a generic way.
  * @device_control: manipulate all pending operations on a channel, returns
  *	zero or error code
  * @device_tx_status: poll for transaction completion, the optional
@@ -509,6 +578,9 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction);
+	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags);
 	int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		unsigned long arg);
 
-- 
cgit v1.2.3


From ca21a146a45a179a2a7bc86d938a2fbf571a7510 Mon Sep 17 00:00:00 2001
From: Rongjun Ying <Rongjun.Ying@csr.com>
Date: Thu, 27 Oct 2011 19:22:39 -0700
Subject: dmaengine: add CSR SiRFprimaII DMAC driver

Cc: Jassi Brar <jaswinder.singh@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rongjun Ying <rongjun.ying@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
[fixed direction enums and cyclic api based on changes
 already merged]
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 MAINTAINERS                 |   1 +
 drivers/dma/Kconfig         |   7 +
 drivers/dma/Makefile        |   1 +
 drivers/dma/sirf-dma.c      | 717 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sirfsoc_dma.h |   6 +
 5 files changed, 732 insertions(+)
 create mode 100644 drivers/dma/sirf-dma.c
 create mode 100644 include/linux/sirfsoc_dma.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 4808256446f2..1b141d71ea13 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -749,6 +749,7 @@ M:	Barry Song <baohua.song@csr.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-prima2/
+F:	drivers/dma/sirf-dma*
 
 ARM/EBSA110 MACHINE SUPPORT
 M:	Russell King <linux@arm.linux.org.uk>
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 7ec0d6cef0c3..f1a274994bb1 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -187,6 +187,13 @@ config TIMB_DMA
 	help
 	  Enable support for the Timberdale FPGA DMA engine.
 
+config SIRF_DMA
+	tristate "CSR SiRFprimaII DMA support"
+	depends on ARCH_PRIMA2
+	select DMA_ENGINE
+	help
+	  Enable support for the CSR SiRFprimaII DMA engine.
+
 config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 30cf3b1f0c5c..009a222e8283 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
+obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
new file mode 100644
index 000000000000..55ec67997670
--- /dev/null
+++ b/drivers/dma/sirf-dma.c
@@ -0,0 +1,717 @@
+/*
+ * DMA controller driver for CSR SiRFprimaII
+ *
+ * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/sirfsoc_dma.h>
+
+#define SIRFSOC_DMA_DESCRIPTORS                 16
+#define SIRFSOC_DMA_CHANNELS                    16
+
+#define SIRFSOC_DMA_CH_ADDR                     0x00
+#define SIRFSOC_DMA_CH_XLEN                     0x04
+#define SIRFSOC_DMA_CH_YLEN                     0x08
+#define SIRFSOC_DMA_CH_CTRL                     0x0C
+
+#define SIRFSOC_DMA_WIDTH_0                     0x100
+#define SIRFSOC_DMA_CH_VALID                    0x140
+#define SIRFSOC_DMA_CH_INT                      0x144
+#define SIRFSOC_DMA_INT_EN                      0x148
+#define SIRFSOC_DMA_CH_LOOP_CTRL                0x150
+
+#define SIRFSOC_DMA_MODE_CTRL_BIT               4
+#define SIRFSOC_DMA_DIR_CTRL_BIT                5
+
+/* xlen and dma_width register is in 4 bytes boundary */
+#define SIRFSOC_DMA_WORD_LEN			4
+
+struct sirfsoc_dma_desc {
+	struct dma_async_tx_descriptor	desc;
+	struct list_head		node;
+
+	/* SiRFprimaII 2D-DMA parameters */
+
+	int             xlen;           /* DMA xlen */
+	int             ylen;           /* DMA ylen */
+	int             width;          /* DMA width */
+	int             dir;
+	bool            cyclic;         /* is loop DMA? */
+	u32             addr;		/* DMA buffer address */
+};
+
+struct sirfsoc_dma_chan {
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		queued;
+	struct list_head		active;
+	struct list_head		completed;
+	dma_cookie_t			completed_cookie;
+	unsigned long			happened_cyclic;
+	unsigned long			completed_cyclic;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+
+	int				mode;
+};
+
+struct sirfsoc_dma {
+	struct dma_device		dma;
+	struct tasklet_struct		tasklet;
+	struct sirfsoc_dma_chan		channels[SIRFSOC_DMA_CHANNELS];
+	void __iomem			*base;
+	int				irq;
+};
+
+#define DRV_NAME	"sirfsoc_dma"
+
+/* Convert struct dma_chan to struct sirfsoc_dma_chan */
+static inline
+struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct sirfsoc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct sirfsoc_dma */
+static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(c);
+	return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
+}
+
+/* Execute all queued DMA descriptors */
+static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+
+	/*
+	 * lock has been held by functions calling this, so we don't hold
+	 * lock again
+	 */
+
+	sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
+		node);
+	/* Move the first queued descriptor to active list */
+	list_move_tail(&schan->queued, &schan->active);
+
+	/* Start the DMA transfer */
+	writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 +
+		cid * 4);
+	writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		(sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 +
+		SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 +
+		SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) |
+		(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+
+	/*
+	 * writel has an implict memory write barrier to make sure data is
+	 * flushed into memory before starting DMA
+	 */
+	writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+			readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL),
+			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		schan->happened_cyclic = schan->completed_cyclic = 0;
+	}
+}
+
+/* Interrupt handler */
+static irqreturn_t sirfsoc_dma_irq(int irq, void *data)
+{
+	struct sirfsoc_dma *sdma = data;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	u32 is;
+	int ch;
+
+	is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
+	while ((ch = fls(is) - 1) >= 0) {
+		is &= ~(1 << ch);
+		writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT);
+		schan = &sdma->channels[ch];
+
+		spin_lock(&schan->lock);
+
+		sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+			node);
+		if (!sdesc->cyclic) {
+			/* Execute queued descriptors */
+			list_splice_tail_init(&schan->active, &schan->completed);
+			if (!list_empty(&schan->queued))
+				sirfsoc_dma_execute(schan);
+		} else
+			schan->happened_cyclic++;
+
+		spin_unlock(&schan->lock);
+	}
+
+	/* Schedule tasklet */
+	tasklet_schedule(&sdma->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/* process completed descriptors */
+static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
+{
+	dma_cookie_t last_cookie = 0;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc;
+	struct dma_async_tx_descriptor *desc;
+	unsigned long flags;
+	unsigned long happened_cyclic;
+	LIST_HEAD(list);
+	int i;
+
+	for (i = 0; i < sdma->dma.chancnt; i++) {
+		schan = &sdma->channels[i];
+
+		/* Get all completed descriptors */
+		spin_lock_irqsave(&schan->lock, flags);
+		if (!list_empty(&schan->completed)) {
+			list_splice_tail_init(&schan->completed, &list);
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			/* Execute callbacks and run dependencies */
+			list_for_each_entry(sdesc, &list, node) {
+				desc = &sdesc->desc;
+
+				if (desc->callback)
+					desc->callback(desc->callback_param);
+
+				last_cookie = desc->cookie;
+				dma_run_dependencies(desc);
+			}
+
+			/* Free descriptors */
+			spin_lock_irqsave(&schan->lock, flags);
+			list_splice_tail_init(&list, &schan->free);
+			schan->completed_cookie = last_cookie;
+			spin_unlock_irqrestore(&schan->lock, flags);
+		} else {
+			/* for cyclic channel, desc is always in active list */
+			sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+				node);
+
+			if (!sdesc || (sdesc && !sdesc->cyclic)) {
+				/* without active cyclic DMA */
+				spin_unlock_irqrestore(&schan->lock, flags);
+				continue;
+			}
+
+			/* cyclic DMA */
+			happened_cyclic = schan->happened_cyclic;
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			desc = &sdesc->desc;
+			while (happened_cyclic != schan->completed_cyclic) {
+				if (desc->callback)
+					desc->callback(desc->callback_param);
+				schan->completed_cyclic++;
+			}
+		}
+	}
+}
+
+/* DMA Tasklet */
+static void sirfsoc_dma_tasklet(unsigned long data)
+{
+	struct sirfsoc_dma *sdma = (void *)data;
+
+	sirfsoc_dma_process_completed(sdma);
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(txd->chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	sdesc = container_of(txd, struct sirfsoc_dma_desc, desc);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Move descriptor to queue */
+	list_move_tail(&sdesc->node, &schan->queued);
+
+	/* Update cookie */
+	cookie = schan->chan.cookie + 1;
+	if (cookie <= 0)
+		cookie = 1;
+
+	schan->chan.cookie = cookie;
+	sdesc->desc.cookie = cookie;
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return cookie;
+}
+
+static int sirfsoc_dma_slave_config(struct sirfsoc_dma_chan *schan,
+	struct dma_slave_config *config)
+{
+	unsigned long flags;
+
+	if ((config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+		(config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES))
+		return -EINVAL;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	schan->mode = (config->src_maxburst == 4 ? 1 : 0);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_terminate_all(struct sirfsoc_dma_chan *schan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	unsigned long flags;
+
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
+		~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+	writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
+		& ~((1 << cid) | 1 << (cid + 16)),
+			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+
+	spin_lock_irqsave(&schan->lock, flags);
+	list_splice_tail_init(&schan->active, &schan->free);
+	list_splice_tail_init(&schan->queued, &schan->free);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+	unsigned long arg)
+{
+	struct dma_slave_config *config;
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		return sirfsoc_dma_terminate_all(schan);
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+		return sirfsoc_dma_slave_config(schan, config);
+
+	default:
+		break;
+	}
+
+	return -ENOSYS;
+}
+
+/* Alloc channel resources */
+static int sirfsoc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	LIST_HEAD(descs);
+	int i;
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) {
+		sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL);
+		if (!sdesc) {
+			dev_notice(sdma->dma.dev, "Memory allocation error. "
+				"Allocated only %u descriptors\n", i);
+			break;
+		}
+
+		dma_async_tx_descriptor_init(&sdesc->desc, chan);
+		sdesc->desc.flags = DMA_CTRL_ACK;
+		sdesc->desc.tx_submit = sirfsoc_dma_tx_submit;
+
+		list_add_tail(&sdesc->node, &descs);
+	}
+
+	/* Return error only if no descriptors were allocated */
+	if (i == 0)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	list_splice_tail_init(&descs, &schan->free);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return i;
+}
+
+/* Free channel resources */
+static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc, *tmp;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Channel must be idle */
+	BUG_ON(!list_empty(&schan->prepared));
+	BUG_ON(!list_empty(&schan->queued));
+	BUG_ON(!list_empty(&schan->active));
+	BUG_ON(!list_empty(&schan->completed));
+
+	/* Move data */
+	list_splice_tail_init(&schan->free, &descs);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(sdesc, tmp, &descs, node)
+		kfree(sdesc);
+}
+
+/* Send pending descriptor to hardware */
+static void sirfsoc_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	if (list_empty(&schan->active) && !list_empty(&schan->queued))
+		sirfsoc_dma_execute(schan);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+}
+
+/* Check request completion status */
+static enum dma_status
+sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+	struct dma_tx_state *txstate)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	last_used = schan->chan.cookie;
+	last_complete = schan->completed_cookie;
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	dma_set_tx_state(txstate, last_complete, last_used, 0);
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+	int ret;
+
+	if ((xt->dir != DMA_MEM_TO_DEV) || (xt->dir != DMA_DEV_TO_MEM)) {
+		ret = -EINVAL;
+		goto err_dir;
+	}
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc) {
+		/* try to free completed descriptors */
+		sirfsoc_dma_process_completed(sdma);
+		ret = 0;
+		goto no_desc;
+	}
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+
+	/*
+	 * Number of chunks in a frame can only be 1 for prima2
+	 * and ylen (number of frame - 1) must be at least 0
+	 */
+	if ((xt->frame_size == 1) && (xt->numf > 0)) {
+		sdesc->cyclic = 0;
+		sdesc->xlen = xt->sgl[0].size / SIRFSOC_DMA_WORD_LEN;
+		sdesc->width = (xt->sgl[0].size + xt->sgl[0].icg) /
+				SIRFSOC_DMA_WORD_LEN;
+		sdesc->ylen = xt->numf - 1;
+		if (xt->dir == DMA_MEM_TO_DEV) {
+			sdesc->addr = xt->src_start;
+			sdesc->dir = 1;
+		} else {
+			sdesc->addr = xt->dst_start;
+			sdesc->dir = 0;
+		}
+
+		list_add_tail(&sdesc->node, &schan->prepared);
+	} else {
+		pr_err("sirfsoc DMA Invalid xfer\n");
+		ret = -EINVAL;
+		goto err_xfer;
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+err_xfer:
+	spin_unlock_irqrestore(&schan->lock, iflags);
+no_desc:
+err_dir:
+	return ERR_PTR(ret);
+}
+
+static struct dma_async_tx_descriptor *
+sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
+	size_t buf_len, size_t period_len,
+	enum dma_transfer_direction direction)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+
+	/*
+	 * we only support cycle transfer with 2 period
+	 * If the X-length is set to 0, it would be the loop mode.
+	 * The DMA address keeps increasing until reaching the end of a loop
+	 * area whose size is defined by (DMA_WIDTH x (Y_LENGTH + 1)). Then
+	 * the DMA address goes back to the beginning of this area.
+	 * In loop mode, the DMA data region is divided into two parts, BUFA
+	 * and BUFB. DMA controller generates interrupts twice in each loop:
+	 * when the DMA address reaches the end of BUFA or the end of the
+	 * BUFB
+	 */
+	if (buf_len !=  2 * period_len)
+		return ERR_PTR(-EINVAL);
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc)
+		return 0;
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+	sdesc->addr = addr;
+	sdesc->cyclic = 1;
+	sdesc->xlen = 0;
+	sdesc->ylen = buf_len / SIRFSOC_DMA_WORD_LEN - 1;
+	sdesc->width = 1;
+	list_add_tail(&sdesc->node, &schan->prepared);
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+}
+
+/*
+ * The DMA controller consists of 16 independent DMA channels.
+ * Each channel is allocated to a different function
+ */
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	unsigned int ch_nr = (unsigned int) chan_id;
+
+	if (ch_nr == chan->chan_id +
+		chan->device->dev_id * SIRFSOC_DMA_CHANNELS)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(sirfsoc_dma_filter_id);
+
+static int __devinit sirfsoc_dma_probe(struct platform_device *op)
+{
+	struct device_node *dn = op->dev.of_node;
+	struct device *dev = &op->dev;
+	struct dma_device *dma;
+	struct sirfsoc_dma *sdma;
+	struct sirfsoc_dma_chan *schan;
+	struct resource res;
+	ulong regs_start, regs_size;
+	u32 id;
+	int ret, i;
+
+	sdma = devm_kzalloc(dev, sizeof(*sdma), GFP_KERNEL);
+	if (!sdma) {
+		dev_err(dev, "Memory exhausted!\n");
+		return -ENOMEM;
+	}
+
+	if (of_property_read_u32(dn, "cell-index", &id)) {
+		dev_err(dev, "Fail to get DMAC index\n");
+		ret = -ENODEV;
+		goto free_mem;
+	}
+
+	sdma->irq = irq_of_parse_and_map(dn, 0);
+	if (sdma->irq == NO_IRQ) {
+		dev_err(dev, "Error mapping IRQ!\n");
+		ret = -EINVAL;
+		goto free_mem;
+	}
+
+	ret = of_address_to_resource(dn, 0, &res);
+	if (ret) {
+		dev_err(dev, "Error parsing memory region!\n");
+		goto free_mem;
+	}
+
+	regs_start = res.start;
+	regs_size = resource_size(&res);
+
+	sdma->base = devm_ioremap(dev, regs_start, regs_size);
+	if (!sdma->base) {
+		dev_err(dev, "Error mapping memory region!\n");
+		ret = -ENOMEM;
+		goto irq_dispose;
+	}
+
+	ret = devm_request_irq(dev, sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME,
+		sdma);
+	if (ret) {
+		dev_err(dev, "Error requesting IRQ!\n");
+		ret = -EINVAL;
+		goto unmap_mem;
+	}
+
+	dma = &sdma->dma;
+	dma->dev = dev;
+	dma->chancnt = SIRFSOC_DMA_CHANNELS;
+
+	dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
+	dma->device_issue_pending = sirfsoc_dma_issue_pending;
+	dma->device_control = sirfsoc_dma_control;
+	dma->device_tx_status = sirfsoc_dma_tx_status;
+	dma->device_prep_interleaved_dma = sirfsoc_dma_prep_interleaved;
+	dma->device_prep_dma_cyclic = sirfsoc_dma_prep_cyclic;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma->cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+	for (i = 0; i < dma->chancnt; i++) {
+		schan = &sdma->channels[i];
+
+		schan->chan.device = dma;
+		schan->chan.cookie = 1;
+		schan->completed_cookie = schan->chan.cookie;
+
+		INIT_LIST_HEAD(&schan->free);
+		INIT_LIST_HEAD(&schan->prepared);
+		INIT_LIST_HEAD(&schan->queued);
+		INIT_LIST_HEAD(&schan->active);
+		INIT_LIST_HEAD(&schan->completed);
+
+		spin_lock_init(&schan->lock);
+		list_add_tail(&schan->chan.device_node, &dma->channels);
+	}
+
+	tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma);
+
+	/* Register DMA engine */
+	dev_set_drvdata(dev, sdma);
+	ret = dma_async_device_register(dma);
+	if (ret)
+		goto free_irq;
+
+	dev_info(dev, "initialized SIRFSOC DMAC driver\n");
+
+	return 0;
+
+free_irq:
+	devm_free_irq(dev, sdma->irq, sdma);
+irq_dispose:
+	irq_dispose_mapping(sdma->irq);
+unmap_mem:
+	iounmap(sdma->base);
+free_mem:
+	devm_kfree(dev, sdma);
+	return ret;
+}
+
+static int __devexit sirfsoc_dma_remove(struct platform_device *op)
+{
+	struct device *dev = &op->dev;
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+
+	dma_async_device_unregister(&sdma->dma);
+	devm_free_irq(dev, sdma->irq, sdma);
+	irq_dispose_mapping(sdma->irq);
+	iounmap(sdma->base);
+	devm_kfree(dev, sdma);
+	return 0;
+}
+
+static struct of_device_id sirfsoc_dma_match[] = {
+	{ .compatible = "sirf,prima2-dmac", },
+	{},
+};
+
+static struct platform_driver sirfsoc_dma_driver = {
+	.probe		= sirfsoc_dma_probe,
+	.remove		= __devexit_p(sirfsoc_dma_remove),
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table	= sirfsoc_dma_match,
+	},
+};
+
+static int __init sirfsoc_dma_init(void)
+{
+	return platform_driver_register(&sirfsoc_dma_driver);
+}
+module_init(sirfsoc_dma_init);
+
+static void __exit sirfsoc_dma_exit(void)
+{
+	platform_driver_unregister(&sirfsoc_dma_driver);
+}
+module_exit(sirfsoc_dma_exit);
+
+MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, "
+	"Barry Song <baohua.song@csr.com>");
+MODULE_DESCRIPTION("SIRFSOC DMA control driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/sirfsoc_dma.h b/include/linux/sirfsoc_dma.h
new file mode 100644
index 000000000000..29d959333d81
--- /dev/null
+++ b/include/linux/sirfsoc_dma.h
@@ -0,0 +1,6 @@
+#ifndef _SIRFSOC_DMA_H_
+#define _SIRFSOC_DMA_H_
+
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id);
+
+#endif
-- 
cgit v1.2.3


From 56242a1fc595d158eddefbb4d6d76e82c2535f55 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 21 Nov 2011 21:33:18 -0800
Subject: sh: clkfwk: setup clock parent from current register value

Some clocks can select its parent clock by CPG register.
But it might have been modified by boot-loader or something.
This patch removed fixed initial parent clock,
and setup it from their current register settings.
It works on div6 reparent clocks for now.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/arm/mach-shmobile/clock-sh7372.c  |  6 +++---
 arch/sh/kernel/cpu/sh4a/clock-sh7724.c |  4 ++--
 drivers/sh/clk/cpg.c                   | 35 ++++++++++++++++++++++++++++++++++
 include/linux/sh_clk.h                 |  9 ++++++---
 4 files changed, 46 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c
index 995a9c3aec8f..e349c22a0d71 100644
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -411,11 +411,11 @@ static struct clk *fsibckcr_parent[] = {
 };
 
 static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = {
-	[DIV6_HDMI] = SH_CLK_DIV6_EXT(&pllc1_div2_clk, HDMICKCR, 0,
+	[DIV6_HDMI] = SH_CLK_DIV6_EXT(HDMICKCR, 0,
 				      hdmi_parent, ARRAY_SIZE(hdmi_parent), 6, 2),
-	[DIV6_FSIA] = SH_CLK_DIV6_EXT(&pllc1_div2_clk, FSIACKCR, 0,
+	[DIV6_FSIA] = SH_CLK_DIV6_EXT(FSIACKCR, 0,
 				      fsiackcr_parent, ARRAY_SIZE(fsiackcr_parent), 6, 2),
-	[DIV6_FSIB] = SH_CLK_DIV6_EXT(&pllc1_div2_clk, FSIBCKCR, 0,
+	[DIV6_FSIB] = SH_CLK_DIV6_EXT(FSIBCKCR, 0,
 				      fsibckcr_parent, ARRAY_SIZE(fsibckcr_parent), 6, 2),
 };
 
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
index 8668f557e0ac..77118387f1cf 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -189,9 +189,9 @@ static struct clk *fclkbcr_parent[] = {
 };
 
 static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = {
-	[DIV6_FA] = SH_CLK_DIV6_EXT(&div3_clk, FCLKACR, 0,
+	[DIV6_FA] = SH_CLK_DIV6_EXT(FCLKACR, 0,
 				      fclkacr_parent, ARRAY_SIZE(fclkacr_parent), 6, 2),
-	[DIV6_FB] = SH_CLK_DIV6_EXT(&div3_clk, FCLKBCR, 0,
+	[DIV6_FB] = SH_CLK_DIV6_EXT(FCLKBCR, 0,
 				      fclkbcr_parent, ARRAY_SIZE(fclkbcr_parent), 6, 2),
 };
 
diff --git a/drivers/sh/clk/cpg.c b/drivers/sh/clk/cpg.c
index 82dd6fb17838..5e4301b936e7 100644
--- a/drivers/sh/clk/cpg.c
+++ b/drivers/sh/clk/cpg.c
@@ -167,6 +167,38 @@ static struct clk_ops sh_clk_div6_reparent_clk_ops = {
 	.set_parent	= sh_clk_div6_set_parent,
 };
 
+static int __init sh_clk_init_parent(struct clk *clk)
+{
+	u32 val;
+
+	if (clk->parent)
+		return 0;
+
+	if (!clk->parent_table || !clk->parent_num)
+		return 0;
+
+	if (!clk->src_width) {
+		pr_err("sh_clk_init_parent: cannot select parent clock\n");
+		return -EINVAL;
+	}
+
+	val  = (__raw_readl(clk->enable_reg) >> clk->src_shift);
+	val &= (1 << clk->src_width) - 1;
+
+	if (val >= clk->parent_num) {
+		pr_err("sh_clk_init_parent: parent table size failed\n");
+		return -EINVAL;
+	}
+
+	clk->parent = clk->parent_table[val];
+	if (!clk->parent) {
+		pr_err("sh_clk_init_parent: unable to set parent");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int __init sh_clk_div6_register_ops(struct clk *clks, int nr,
 					   struct clk_ops *ops)
 {
@@ -190,6 +222,9 @@ static int __init sh_clk_div6_register_ops(struct clk *clks, int nr,
 		clkp->ops = ops;
 		clkp->freq_table = freq_table + (k * freq_table_size);
 		clkp->freq_table[nr_divs].frequency = CPUFREQ_TABLE_END;
+		ret = sh_clk_init_parent(clkp);
+		if (ret < 0)
+			break;
 
 		ret = clk_register(clkp);
 	}
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index a20831cf336a..e834304c0b6a 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -131,10 +131,9 @@ int sh_clk_div4_enable_register(struct clk *clks, int nr,
 int sh_clk_div4_reparent_register(struct clk *clks, int nr,
 			 struct clk_div4_table *table);
 
-#define SH_CLK_DIV6_EXT(_parent, _reg, _flags, _parents,	\
+#define SH_CLK_DIV6_EXT(_reg, _flags, _parents,			\
 			_num_parents, _src_shift, _src_width)	\
 {								\
-	.parent = _parent,					\
 	.enable_reg = (void __iomem *)_reg,			\
 	.flags = _flags,					\
 	.parent_table = _parents,				\
@@ -144,7 +143,11 @@ int sh_clk_div4_reparent_register(struct clk *clks, int nr,
 }
 
 #define SH_CLK_DIV6(_parent, _reg, _flags)			\
-	SH_CLK_DIV6_EXT(_parent, _reg, _flags, NULL, 0, 0, 0)
+{								\
+	.parent		= _parent,				\
+	.enable_reg	= (void __iomem *)_reg,			\
+	.flags		= _flags,				\
+}
 
 int sh_clk_div6_register(struct clk *clks, int nr);
 int sh_clk_div6_reparent_register(struct clk *clks, int nr);
-- 
cgit v1.2.3


From 92de378b739115c8afaae5cd3f25159406bb9914 Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Fri, 25 Nov 2011 23:19:37 +0400
Subject: max8925_power: No temperature interrupts if temperature not connected

Brownstone does not have temperature reading circuit hooked up.
This leads to spurious interrupts.

Allow the platform layer to indicate no temperature circuit
and do not activate interrupts if no temperature control is set

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/max8925_power.c | 12 ++++++++----
 include/linux/mfd/max8925.h   |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/max8925_power.c b/drivers/power/max8925_power.c
index b16bd71f24fb..83b827cf004b 100644
--- a/drivers/power/max8925_power.c
+++ b/drivers/power/max8925_power.c
@@ -78,6 +78,7 @@ struct max8925_power_info {
 	unsigned		batt_detect:1;	/* detecing MB by ID pin */
 	unsigned		topoff_threshold:2;
 	unsigned		fast_charge:3;
+	unsigned		no_temp_support:1;
 
 	int (*set_charger) (int);
 };
@@ -116,7 +117,7 @@ static irqreturn_t max8925_charger_handler(int irq, void *data)
 	case MAX8925_IRQ_VCHG_DC_F:
 		info->ac_online = 0;
 		__set_charger(info, 0);
-		dev_dbg(chip->dev, "Adapter is removal\n");
+		dev_dbg(chip->dev, "Adapter removed\n");
 		break;
 	case MAX8925_IRQ_VCHG_USB_R:
 		info->usb_online = 1;
@@ -126,7 +127,7 @@ static irqreturn_t max8925_charger_handler(int irq, void *data)
 	case MAX8925_IRQ_VCHG_USB_F:
 		info->usb_online = 0;
 		__set_charger(info, 0);
-		dev_dbg(chip->dev, "USB is removal\n");
+		dev_dbg(chip->dev, "USB removed\n");
 		break;
 	case MAX8925_IRQ_VCHG_THM_OK_F:
 		/* Battery is not ready yet */
@@ -369,8 +370,10 @@ static __devinit int max8925_init_charger(struct max8925_chip *chip,
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_OVP, "usb-ovp");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_F, "usb-remove");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_R, "usb-insert");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_R, "batt-temp-in-range");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_F, "batt-temp-out-range");
+	if (!info->no_temp_support) {
+		REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_R, "batt-temp-in-range");
+		REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_F, "batt-temp-out-range");
+	}
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_SYSLOW_F, "vsys-high");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_SYSLOW_R, "vsys-low");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_RST, "charger-reset");
@@ -477,6 +480,7 @@ static __devinit int max8925_power_probe(struct platform_device *pdev)
 	info->topoff_threshold = pdata->topoff_threshold;
 	info->fast_charge = pdata->fast_charge;
 	info->set_charger = pdata->set_charger;
+	info->no_temp_support = pdata->no_temp_support;
 
 	max8925_init_charger(chip, info);
 	return 0;
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 5259dfe8c585..69ec8f0bd490 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -223,6 +223,7 @@ struct max8925_power_pdata {
 	unsigned	batt_detect:1;
 	unsigned	topoff_threshold:2;
 	unsigned	fast_charge:3;	/* charge current */
+	unsigned	no_temp_support:1; /* set if no temperature detect */
 };
 
 /*
-- 
cgit v1.2.3


From 5ba1fa0ae288e93179d54e3c59b2241eb1709f0c Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Fri, 25 Nov 2011 23:24:03 +0400
Subject: max8925_power: Do not detect ac insert if handled by other code

On brownstone rev 4 ac-insert detect is handled by vbus.

allow the platform code to configure the disabling of insert
by setting no_insert_detect.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/max8925_power.c | 8 ++++++--
 include/linux/mfd/max8925.h   | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/max8925_power.c b/drivers/power/max8925_power.c
index 83b827cf004b..be2d563cb315 100644
--- a/drivers/power/max8925_power.c
+++ b/drivers/power/max8925_power.c
@@ -79,6 +79,7 @@ struct max8925_power_info {
 	unsigned		topoff_threshold:2;
 	unsigned		fast_charge:3;
 	unsigned		no_temp_support:1;
+	unsigned		no_insert_detect:1;
 
 	int (*set_charger) (int);
 };
@@ -365,8 +366,10 @@ static __devinit int max8925_init_charger(struct max8925_chip *chip,
 	int ret;
 
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_OVP, "ac-ovp");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_F, "ac-remove");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_R, "ac-insert");
+	if (!info->no_insert_detect) {
+		REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_F, "ac-remove");
+		REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_R, "ac-insert");
+	}
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_OVP, "usb-ovp");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_F, "usb-remove");
 	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_R, "usb-insert");
@@ -481,6 +484,7 @@ static __devinit int max8925_power_probe(struct platform_device *pdev)
 	info->fast_charge = pdata->fast_charge;
 	info->set_charger = pdata->set_charger;
 	info->no_temp_support = pdata->no_temp_support;
+	info->no_insert_detect = pdata->no_insert_detect;
 
 	max8925_init_charger(chip, info);
 	return 0;
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 69ec8f0bd490..e742e044e2eb 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -224,6 +224,7 @@ struct max8925_power_pdata {
 	unsigned	topoff_threshold:2;
 	unsigned	fast_charge:3;	/* charge current */
 	unsigned	no_temp_support:1; /* set if no temperature detect */
+	unsigned	no_insert_detect:1; /* set if no ac insert detect */
 };
 
 /*
-- 
cgit v1.2.3


From 72af5a4b9cc9c4527f2967e0283bee632237c26e Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Fri, 25 Nov 2011 23:11:06 +0400
Subject: max8925_power: Remove support for irq bits that do not exist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The max8925 cannot return usb status.  The bits

       [MAX8925_IRQ_VCHG_USB_OVP] = {
               .reg            = MAX8925_CHG_IRQ1,
               .mask_reg       = MAX8925_CHG_IRQ1_MASK,
               .offs           = 1 << 3,
       },
       [MAX8925_IRQ_VCHG_USB_F] =  {
               .reg            = MAX8925_CHG_IRQ1,
               .mask_reg       = MAX8925_CHG_IRQ1_MASK,
               .offs           = 1 << 4,
       },
       [MAX8925_IRQ_VCHG_USB_R] = {
               .reg            = MAX8925_CHG_IRQ1,
               .mask_reg       = MAX8925_CHG_IRQ1_MASK,
               .offs           = 1 << 5,
       },

do not exist in the irq register.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/mfd/max8925-core.c    | 15 ---------------
 drivers/power/max8925_power.c | 13 -------------
 include/linux/mfd/max8925.h   |  3 ---
 3 files changed, 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index e1e59c92f758..ca881efedf75 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -210,21 +210,6 @@ static struct max8925_irq_data max8925_irqs[] = {
 		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
 		.offs		= 1 << 2,
 	},
-	[MAX8925_IRQ_VCHG_USB_OVP] = {
-		.reg		= MAX8925_CHG_IRQ1,
-		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
-		.offs		= 1 << 3,
-	},
-	[MAX8925_IRQ_VCHG_USB_F] =  {
-		.reg		= MAX8925_CHG_IRQ1,
-		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
-		.offs		= 1 << 4,
-	},
-	[MAX8925_IRQ_VCHG_USB_R] = {
-		.reg		= MAX8925_CHG_IRQ1,
-		.mask_reg	= MAX8925_CHG_IRQ1_MASK,
-		.offs		= 1 << 5,
-	},
 	[MAX8925_IRQ_VCHG_THM_OK_R] = {
 		.reg		= MAX8925_CHG_IRQ2,
 		.mask_reg	= MAX8925_CHG_IRQ2_MASK,
diff --git a/drivers/power/max8925_power.c b/drivers/power/max8925_power.c
index be2d563cb315..cbc7a0b6da52 100644
--- a/drivers/power/max8925_power.c
+++ b/drivers/power/max8925_power.c
@@ -120,16 +120,6 @@ static irqreturn_t max8925_charger_handler(int irq, void *data)
 		__set_charger(info, 0);
 		dev_dbg(chip->dev, "Adapter removed\n");
 		break;
-	case MAX8925_IRQ_VCHG_USB_R:
-		info->usb_online = 1;
-		__set_charger(info, 1);
-		dev_dbg(chip->dev, "USB inserted\n");
-		break;
-	case MAX8925_IRQ_VCHG_USB_F:
-		info->usb_online = 0;
-		__set_charger(info, 0);
-		dev_dbg(chip->dev, "USB removed\n");
-		break;
 	case MAX8925_IRQ_VCHG_THM_OK_F:
 		/* Battery is not ready yet */
 		dev_dbg(chip->dev, "Battery temperature is out of range\n");
@@ -370,9 +360,6 @@ static __devinit int max8925_init_charger(struct max8925_chip *chip,
 		REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_F, "ac-remove");
 		REQUEST_IRQ(MAX8925_IRQ_VCHG_DC_R, "ac-insert");
 	}
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_OVP, "usb-ovp");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_F, "usb-remove");
-	REQUEST_IRQ(MAX8925_IRQ_VCHG_USB_R, "usb-insert");
 	if (!info->no_temp_support) {
 		REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_R, "batt-temp-in-range");
 		REQUEST_IRQ(MAX8925_IRQ_VCHG_THM_OK_F, "batt-temp-out-range");
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index e742e044e2eb..10aeaf8bfb94 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -167,9 +167,6 @@ enum {
 	MAX8925_IRQ_VCHG_DC_OVP,
 	MAX8925_IRQ_VCHG_DC_F,
 	MAX8925_IRQ_VCHG_DC_R,
-	MAX8925_IRQ_VCHG_USB_OVP,
-	MAX8925_IRQ_VCHG_USB_F,
-	MAX8925_IRQ_VCHG_USB_R,
 	MAX8925_IRQ_VCHG_THM_OK_R,
 	MAX8925_IRQ_VCHG_THM_OK_F,
 	MAX8925_IRQ_VCHG_SYSLOW_F,
-- 
cgit v1.2.3


From e7a5f6d55991fb3b3214f435681ee2db96320395 Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Mon, 29 Aug 2011 09:32:04 -0700
Subject: max8925_power: Enable power change notifications

The power core infrastructure allow external power change
events to be passed to drivers what are listed in the
supplied_to call back field.  Enable this feature by
allowing the supplied_to field to be passed to the driver.

This feature will enable drivers named in the supplied_to
field that have a external_power_changed callback to be
notified when power was been turned on or off.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/max8925_power.c | 5 +++++
 include/linux/mfd/max8925.h   | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/max8925_power.c b/drivers/power/max8925_power.c
index cbc7a0b6da52..377d1e633b4a 100644
--- a/drivers/power/max8925_power.c
+++ b/drivers/power/max8925_power.c
@@ -441,6 +441,8 @@ static __devinit int max8925_power_probe(struct platform_device *pdev)
 	info->ac.properties = max8925_ac_props;
 	info->ac.num_properties = ARRAY_SIZE(max8925_ac_props);
 	info->ac.get_property = max8925_ac_get_prop;
+	info->ac.supplied_to = pdata->supplied_to;
+	info->ac.num_supplicants = pdata->num_supplicants;
 	ret = power_supply_register(&pdev->dev, &info->ac);
 	if (ret)
 		goto out;
@@ -451,6 +453,9 @@ static __devinit int max8925_power_probe(struct platform_device *pdev)
 	info->usb.properties = max8925_usb_props;
 	info->usb.num_properties = ARRAY_SIZE(max8925_usb_props);
 	info->usb.get_property = max8925_usb_get_prop;
+	info->usb.supplied_to = pdata->supplied_to;
+	info->usb.num_supplicants = pdata->num_supplicants;
+
 	ret = power_supply_register(&pdev->dev, &info->usb);
 	if (ret)
 		goto out_usb;
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 10aeaf8bfb94..b8e6d9449086 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -222,6 +222,8 @@ struct max8925_power_pdata {
 	unsigned	fast_charge:3;	/* charge current */
 	unsigned	no_temp_support:1; /* set if no temperature detect */
 	unsigned	no_insert_detect:1; /* set if no ac insert detect */
+	char		**supplied_to;
+	int		num_supplicants;
 };
 
 /*
-- 
cgit v1.2.3


From 40216ce7aa88c2e70869723a0f5929fdbd4a91c5 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 28 Nov 2011 09:44:17 +0100
Subject: ASoC: Move SigmaDSP firmware loader to ASoC

It has been pointed out previously, that the firmware subsystem is not the right
place for the SigmaDSP firmware loader. Furthermore the SigmaDSP is currently
only used in audio products and we are aiming for better integration into the
ASoC framework in the future, with support for ALSA controls for firmware
parameters and support dynamic power management as well. So the natural choice
for the SigmaDSP firmware loader is the ASoC subsystem.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 MAINTAINERS                 |   1 +
 drivers/firmware/Kconfig    |  12 ----
 drivers/firmware/Makefile   |   1 -
 drivers/firmware/sigma.c    | 153 -------------------------------------------
 include/linux/sigma.h       |  55 ----------------
 sound/soc/codecs/Kconfig    |   6 +-
 sound/soc/codecs/Makefile   |   2 +
 sound/soc/codecs/adau1701.c |   2 +-
 sound/soc/codecs/sigmadsp.c | 154 ++++++++++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/sigmadsp.h |  55 ++++++++++++++++
 10 files changed, 218 insertions(+), 223 deletions(-)
 delete mode 100644 drivers/firmware/sigma.c
 delete mode 100644 include/linux/sigma.h
 create mode 100644 sound/soc/codecs/sigmadsp.c
 create mode 100644 sound/soc/codecs/sigmadsp.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index fd7e441b5ea7..6a93a930ec66 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -542,6 +542,7 @@ F:	sound/soc/codecs/adau*
 F:	sound/soc/codecs/adav*
 F:	sound/soc/codecs/ad1*
 F:	sound/soc/codecs/ssm*
+F:	sound/soc/codecs/sigmadsp.*
 
 ANALOG DEVICES INC ASOC DRIVERS
 L:	uclinux-dist-devel@blackfin.uclinux.org
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index efba163595db..9b00072a020f 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -145,18 +145,6 @@ config ISCSI_IBFT
 	  detect iSCSI boot parameters dynamically during system boot, say Y.
 	  Otherwise, say N.
 
-config SIGMA
-	tristate "SigmaStudio firmware loader"
-	depends on I2C
-	select CRC32
-	default n
-	help
-	  Enable helper functions for working with Analog Devices SigmaDSP
-	  parts and binary firmwares produced by Analog Devices SigmaStudio.
-
-	  If unsure, say N here.  Drivers that need these helpers will select
-	  this option automatically.
-
 source "drivers/firmware/google/Kconfig"
 
 endmenu
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 47338c979126..5a7e27399729 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -12,6 +12,5 @@ obj-$(CONFIG_DMIID)		+= dmi-id.o
 obj-$(CONFIG_ISCSI_IBFT_FIND)	+= iscsi_ibft_find.o
 obj-$(CONFIG_ISCSI_IBFT)	+= iscsi_ibft.o
 obj-$(CONFIG_FIRMWARE_MEMMAP)	+= memmap.o
-obj-$(CONFIG_SIGMA)		+= sigma.o
 
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
diff --git a/drivers/firmware/sigma.c b/drivers/firmware/sigma.c
deleted file mode 100644
index 1eedb6f7fdab..000000000000
--- a/drivers/firmware/sigma.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Load Analog Devices SigmaStudio firmware files
- *
- * Copyright 2009-2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/crc32.h>
-#include <linux/delay.h>
-#include <linux/firmware.h>
-#include <linux/kernel.h>
-#include <linux/i2c.h>
-#include <linux/module.h>
-#include <linux/sigma.h>
-
-static size_t sigma_action_size(struct sigma_action *sa)
-{
-	size_t payload = 0;
-
-	switch (sa->instr) {
-	case SIGMA_ACTION_WRITEXBYTES:
-	case SIGMA_ACTION_WRITESINGLE:
-	case SIGMA_ACTION_WRITESAFELOAD:
-		payload = sigma_action_len(sa);
-		break;
-	default:
-		break;
-	}
-
-	payload = ALIGN(payload, 2);
-
-	return payload + sizeof(struct sigma_action);
-}
-
-/*
- * Returns a negative error value in case of an error, 0 if processing of
- * the firmware should be stopped after this action, 1 otherwise.
- */
-static int
-process_sigma_action(struct i2c_client *client, struct sigma_action *sa)
-{
-	size_t len = sigma_action_len(sa);
-	int ret;
-
-	pr_debug("%s: instr:%i addr:%#x len:%zu\n", __func__,
-		sa->instr, sa->addr, len);
-
-	switch (sa->instr) {
-	case SIGMA_ACTION_WRITEXBYTES:
-	case SIGMA_ACTION_WRITESINGLE:
-	case SIGMA_ACTION_WRITESAFELOAD:
-		ret = i2c_master_send(client, (void *)&sa->addr, len);
-		if (ret < 0)
-			return -EINVAL;
-		break;
-	case SIGMA_ACTION_DELAY:
-		udelay(len);
-		len = 0;
-		break;
-	case SIGMA_ACTION_END:
-		return 0;
-	default:
-		return -EINVAL;
-	}
-
-	return 1;
-}
-
-static int
-process_sigma_actions(struct i2c_client *client, struct sigma_firmware *ssfw)
-{
-	struct sigma_action *sa;
-	size_t size;
-	int ret;
-
-	while (ssfw->pos + sizeof(*sa) <= ssfw->fw->size) {
-		sa = (struct sigma_action *)(ssfw->fw->data + ssfw->pos);
-
-		size = sigma_action_size(sa);
-		ssfw->pos += size;
-		if (ssfw->pos > ssfw->fw->size || size == 0)
-			break;
-
-		ret = process_sigma_action(client, sa);
-
-		pr_debug("%s: action returned %i\n", __func__, ret);
-
-		if (ret <= 0)
-			return ret;
-	}
-
-	if (ssfw->pos != ssfw->fw->size)
-		return -EINVAL;
-
-	return 0;
-}
-
-int process_sigma_firmware(struct i2c_client *client, const char *name)
-{
-	int ret;
-	struct sigma_firmware_header *ssfw_head;
-	struct sigma_firmware ssfw;
-	const struct firmware *fw;
-	u32 crc;
-
-	pr_debug("%s: loading firmware %s\n", __func__, name);
-
-	/* first load the blob */
-	ret = request_firmware(&fw, name, &client->dev);
-	if (ret) {
-		pr_debug("%s: request_firmware() failed with %i\n", __func__, ret);
-		return ret;
-	}
-	ssfw.fw = fw;
-
-	/* then verify the header */
-	ret = -EINVAL;
-
-	/*
-	 * Reject too small or unreasonable large files. The upper limit has been
-	 * chosen a bit arbitrarily, but it should be enough for all practical
-	 * purposes and having the limit makes it easier to avoid integer
-	 * overflows later in the loading process.
-	 */
-	if (fw->size < sizeof(*ssfw_head) || fw->size >= 0x4000000)
-		goto done;
-
-	ssfw_head = (void *)fw->data;
-	if (memcmp(ssfw_head->magic, SIGMA_MAGIC, ARRAY_SIZE(ssfw_head->magic)))
-		goto done;
-
-	crc = crc32(0, fw->data + sizeof(*ssfw_head),
-			fw->size - sizeof(*ssfw_head));
-	pr_debug("%s: crc=%x\n", __func__, crc);
-	if (crc != le32_to_cpu(ssfw_head->crc))
-		goto done;
-
-	ssfw.pos = sizeof(*ssfw_head);
-
-	/* finally process all of the actions */
-	ret = process_sigma_actions(client, &ssfw);
-
- done:
-	release_firmware(fw);
-
-	pr_debug("%s: loaded %s\n", __func__, name);
-
-	return ret;
-}
-EXPORT_SYMBOL(process_sigma_firmware);
-
-MODULE_LICENSE("GPL");
diff --git a/include/linux/sigma.h b/include/linux/sigma.h
deleted file mode 100644
index d0de882c0d96..000000000000
--- a/include/linux/sigma.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Load firmware files from Analog Devices SigmaStudio
- *
- * Copyright 2009-2011 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#ifndef __SIGMA_FIRMWARE_H__
-#define __SIGMA_FIRMWARE_H__
-
-#include <linux/firmware.h>
-#include <linux/types.h>
-
-struct i2c_client;
-
-#define SIGMA_MAGIC "ADISIGM"
-
-struct sigma_firmware {
-	const struct firmware *fw;
-	size_t pos;
-};
-
-struct sigma_firmware_header {
-	unsigned char magic[7];
-	u8 version;
-	__le32 crc;
-};
-
-enum {
-	SIGMA_ACTION_WRITEXBYTES = 0,
-	SIGMA_ACTION_WRITESINGLE,
-	SIGMA_ACTION_WRITESAFELOAD,
-	SIGMA_ACTION_DELAY,
-	SIGMA_ACTION_PLLWAIT,
-	SIGMA_ACTION_NOOP,
-	SIGMA_ACTION_END,
-};
-
-struct sigma_action {
-	u8 instr;
-	u8 len_hi;
-	__le16 len;
-	__be16 addr;
-	unsigned char payload[];
-};
-
-static inline u32 sigma_action_len(struct sigma_action *sa)
-{
-	return (sa->len_hi << 16) | le16_to_cpu(sa->len);
-}
-
-extern int process_sigma_firmware(struct i2c_client *client, const char *name);
-
-#endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 686f45a07f34..593174c78d7b 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -141,7 +141,7 @@ config SND_SOC_AD73311
 	tristate
 
 config SND_SOC_ADAU1701
-	select SIGMA
+	select SND_SOC_SIGMADSP
 	tristate
 
 config SND_SOC_ADAU1373
@@ -234,6 +234,10 @@ config SND_SOC_RT5631
 config SND_SOC_SGTL5000
 	tristate
 
+config SND_SOC_SIGMADSP
+	tristate
+	select CRC32
+
 config SND_SOC_SN95031
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 62b01e4e7983..fa15006fcac5 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -33,6 +33,7 @@ snd-soc-rt5631-objs := rt5631.o
 snd-soc-sgtl5000-objs := sgtl5000.o
 snd-soc-alc5623-objs := alc5623.o
 snd-soc-alc5632-objs := alc5632.o
+snd-soc-sigmadsp-objs := sigmadsp.o
 snd-soc-sn95031-objs := sn95031.o
 snd-soc-spdif-objs := spdif_transciever.o
 snd-soc-ssm2602-objs := ssm2602.o
@@ -134,6 +135,7 @@ obj-$(CONFIG_SND_SOC_MAX9850)	+= snd-soc-max9850.o
 obj-$(CONFIG_SND_SOC_PCM3008)	+= snd-soc-pcm3008.o
 obj-$(CONFIG_SND_SOC_RT5631)	+= snd-soc-rt5631.o
 obj-$(CONFIG_SND_SOC_SGTL5000)  += snd-soc-sgtl5000.o
+obj-$(CONFIG_SND_SOC_SIGMADSP)	+= snd-soc-sigmadsp.o
 obj-$(CONFIG_SND_SOC_SN95031)	+=snd-soc-sn95031.o
 obj-$(CONFIG_SND_SOC_SPDIF)	+= snd-soc-spdif.o
 obj-$(CONFIG_SND_SOC_SSM2602)	+= snd-soc-ssm2602.o
diff --git a/sound/soc/codecs/adau1701.c b/sound/soc/codecs/adau1701.c
index 8b7e1c50d6e9..6a6af567f02a 100644
--- a/sound/soc/codecs/adau1701.c
+++ b/sound/soc/codecs/adau1701.c
@@ -12,13 +12,13 @@
 #include <linux/init.h>
 #include <linux/i2c.h>
 #include <linux/delay.h>
-#include <linux/sigma.h>
 #include <linux/slab.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
 #include <sound/soc.h>
 
+#include "sigmadsp.h"
 #include "adau1701.h"
 
 #define ADAU1701_DSPCTRL	0x1c
diff --git a/sound/soc/codecs/sigmadsp.c b/sound/soc/codecs/sigmadsp.c
new file mode 100644
index 000000000000..acb97a9834aa
--- /dev/null
+++ b/sound/soc/codecs/sigmadsp.c
@@ -0,0 +1,154 @@
+/*
+ * Load Analog Devices SigmaStudio firmware files
+ *
+ * Copyright 2009-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/kernel.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+
+#include "sigmadsp.h"
+
+static size_t sigma_action_size(struct sigma_action *sa)
+{
+	size_t payload = 0;
+
+	switch (sa->instr) {
+	case SIGMA_ACTION_WRITEXBYTES:
+	case SIGMA_ACTION_WRITESINGLE:
+	case SIGMA_ACTION_WRITESAFELOAD:
+		payload = sigma_action_len(sa);
+		break;
+	default:
+		break;
+	}
+
+	payload = ALIGN(payload, 2);
+
+	return payload + sizeof(struct sigma_action);
+}
+
+/*
+ * Returns a negative error value in case of an error, 0 if processing of
+ * the firmware should be stopped after this action, 1 otherwise.
+ */
+static int
+process_sigma_action(struct i2c_client *client, struct sigma_action *sa)
+{
+	size_t len = sigma_action_len(sa);
+	int ret;
+
+	pr_debug("%s: instr:%i addr:%#x len:%zu\n", __func__,
+		sa->instr, sa->addr, len);
+
+	switch (sa->instr) {
+	case SIGMA_ACTION_WRITEXBYTES:
+	case SIGMA_ACTION_WRITESINGLE:
+	case SIGMA_ACTION_WRITESAFELOAD:
+		ret = i2c_master_send(client, (void *)&sa->addr, len);
+		if (ret < 0)
+			return -EINVAL;
+		break;
+	case SIGMA_ACTION_DELAY:
+		udelay(len);
+		len = 0;
+		break;
+	case SIGMA_ACTION_END:
+		return 0;
+	default:
+		return -EINVAL;
+	}
+
+	return 1;
+}
+
+static int
+process_sigma_actions(struct i2c_client *client, struct sigma_firmware *ssfw)
+{
+	struct sigma_action *sa;
+	size_t size;
+	int ret;
+
+	while (ssfw->pos + sizeof(*sa) <= ssfw->fw->size) {
+		sa = (struct sigma_action *)(ssfw->fw->data + ssfw->pos);
+
+		size = sigma_action_size(sa);
+		ssfw->pos += size;
+		if (ssfw->pos > ssfw->fw->size || size == 0)
+			break;
+
+		ret = process_sigma_action(client, sa);
+
+		pr_debug("%s: action returned %i\n", __func__, ret);
+
+		if (ret <= 0)
+			return ret;
+	}
+
+	if (ssfw->pos != ssfw->fw->size)
+		return -EINVAL;
+
+	return 0;
+}
+
+int process_sigma_firmware(struct i2c_client *client, const char *name)
+{
+	int ret;
+	struct sigma_firmware_header *ssfw_head;
+	struct sigma_firmware ssfw;
+	const struct firmware *fw;
+	u32 crc;
+
+	pr_debug("%s: loading firmware %s\n", __func__, name);
+
+	/* first load the blob */
+	ret = request_firmware(&fw, name, &client->dev);
+	if (ret) {
+		pr_debug("%s: request_firmware() failed with %i\n", __func__, ret);
+		return ret;
+	}
+	ssfw.fw = fw;
+
+	/* then verify the header */
+	ret = -EINVAL;
+
+	/*
+	 * Reject too small or unreasonable large files. The upper limit has been
+	 * chosen a bit arbitrarily, but it should be enough for all practical
+	 * purposes and having the limit makes it easier to avoid integer
+	 * overflows later in the loading process.
+	 */
+	if (fw->size < sizeof(*ssfw_head) || fw->size >= 0x4000000)
+		goto done;
+
+	ssfw_head = (void *)fw->data;
+	if (memcmp(ssfw_head->magic, SIGMA_MAGIC, ARRAY_SIZE(ssfw_head->magic)))
+		goto done;
+
+	crc = crc32(0, fw->data + sizeof(*ssfw_head),
+			fw->size - sizeof(*ssfw_head));
+	pr_debug("%s: crc=%x\n", __func__, crc);
+	if (crc != le32_to_cpu(ssfw_head->crc))
+		goto done;
+
+	ssfw.pos = sizeof(*ssfw_head);
+
+	/* finally process all of the actions */
+	ret = process_sigma_actions(client, &ssfw);
+
+ done:
+	release_firmware(fw);
+
+	pr_debug("%s: loaded %s\n", __func__, name);
+
+	return ret;
+}
+EXPORT_SYMBOL(process_sigma_firmware);
+
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/sigmadsp.h b/sound/soc/codecs/sigmadsp.h
new file mode 100644
index 000000000000..d0de882c0d96
--- /dev/null
+++ b/sound/soc/codecs/sigmadsp.h
@@ -0,0 +1,55 @@
+/*
+ * Load firmware files from Analog Devices SigmaStudio
+ *
+ * Copyright 2009-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __SIGMA_FIRMWARE_H__
+#define __SIGMA_FIRMWARE_H__
+
+#include <linux/firmware.h>
+#include <linux/types.h>
+
+struct i2c_client;
+
+#define SIGMA_MAGIC "ADISIGM"
+
+struct sigma_firmware {
+	const struct firmware *fw;
+	size_t pos;
+};
+
+struct sigma_firmware_header {
+	unsigned char magic[7];
+	u8 version;
+	__le32 crc;
+};
+
+enum {
+	SIGMA_ACTION_WRITEXBYTES = 0,
+	SIGMA_ACTION_WRITESINGLE,
+	SIGMA_ACTION_WRITESAFELOAD,
+	SIGMA_ACTION_DELAY,
+	SIGMA_ACTION_PLLWAIT,
+	SIGMA_ACTION_NOOP,
+	SIGMA_ACTION_END,
+};
+
+struct sigma_action {
+	u8 instr;
+	u8 len_hi;
+	__le16 len;
+	__be16 addr;
+	unsigned char payload[];
+};
+
+static inline u32 sigma_action_len(struct sigma_action *sa)
+{
+	return (sa->len_hi << 16) | le16_to_cpu(sa->len);
+}
+
+extern int process_sigma_firmware(struct i2c_client *client, const char *name);
+
+#endif
-- 
cgit v1.2.3


From 3bfd5c5baf66e975b0f365a0cda8d75bf2953ebe Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Tue, 29 Nov 2011 11:04:09 -0800
Subject: Input: add generic GPIO-tilt driver

There exist tilt switches that simply report their tilt-state via
some gpios. The number and orientation of their axes can vary
depending on the switch used and the build of the device. Also two
or more one-axis switches could be combined to provide multi-dimensional
orientation.

One example of a device using such a switch is the family of Qisda
ebook readers, where the switch provides information about the
landscape / portrait orientation of the device. The example in
Documentation/input/gpio-tilt.txt documents exactly this one-axis
device.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 Documentation/input/gpio-tilt.txt     | 103 ++++++++++++++++
 drivers/input/misc/Kconfig            |  14 +++
 drivers/input/misc/Makefile           |   1 +
 drivers/input/misc/gpio_tilt_polled.c | 213 ++++++++++++++++++++++++++++++++++
 include/linux/input/gpio_tilt.h       |  73 ++++++++++++
 5 files changed, 404 insertions(+)
 create mode 100644 Documentation/input/gpio-tilt.txt
 create mode 100644 drivers/input/misc/gpio_tilt_polled.c
 create mode 100644 include/linux/input/gpio_tilt.h

(limited to 'include/linux')

diff --git a/Documentation/input/gpio-tilt.txt b/Documentation/input/gpio-tilt.txt
new file mode 100644
index 000000000000..06d60c3ff5e7
--- /dev/null
+++ b/Documentation/input/gpio-tilt.txt
@@ -0,0 +1,103 @@
+Driver for tilt-switches connected via GPIOs
+============================================
+
+Generic driver to read data from tilt switches connected via gpios.
+Orientation can be provided by one or more than one tilt switches,
+i.e. each tilt switch providing one axis, and the number of axes
+is also not limited.
+
+
+Data structures:
+----------------
+
+The array of struct gpio in the gpios field is used to list the gpios
+that represent the current tilt state.
+
+The array of struct gpio_tilt_axis describes the axes that are reported
+to the input system. The values set therein are used for the
+input_set_abs_params calls needed to init the axes.
+
+The array of struct gpio_tilt_state maps gpio states to the corresponding
+values to report. The gpio state is represented as a bitfield where the
+bit-index corresponds to the index of the gpio in the struct gpio array.
+In the same manner the values stored in the axes array correspond to
+the elements of the gpio_tilt_axis-array.
+
+
+Example:
+--------
+
+Example configuration for a single TS1003 tilt switch that rotates around
+one axis in 4 steps and emitts the current tilt via two GPIOs.
+
+static int sg060_tilt_enable(struct device *dev) {
+	/* code to enable the sensors */
+};
+
+static void sg060_tilt_disable(struct device *dev) {
+	/* code to disable the sensors */
+};
+
+static struct gpio sg060_tilt_gpios[] = {
+	{ SG060_TILT_GPIO_SENSOR1, GPIOF_IN, "tilt_sensor1" },
+	{ SG060_TILT_GPIO_SENSOR2, GPIOF_IN, "tilt_sensor2" },
+};
+
+static struct gpio_tilt_state sg060_tilt_states[] = {
+	{
+		.gpios = (0 << 1) | (0 << 0),
+		.axes = (int[]) {
+			0,
+		},
+	}, {
+		.gpios = (0 << 1) | (1 << 0),
+		.axes = (int[]) {
+			1, /* 90 degrees */
+		},
+	}, {
+		.gpios = (1 << 1) | (1 << 0),
+		.axes = (int[]) {
+			2, /* 180 degrees */
+		},
+	}, {
+		.gpios = (1 << 1) | (0 << 0),
+		.axes = (int[]) {
+			3, /* 270 degrees */
+		},
+	},
+};
+
+static struct gpio_tilt_axis sg060_tilt_axes[] = {
+	{
+		.axis = ABS_RY,
+		.min = 0,
+		.max = 3,
+		.fuzz = 0,
+		.flat = 0,
+	},
+};
+
+static struct gpio_tilt_platform_data sg060_tilt_pdata= {
+	.gpios = sg060_tilt_gpios,
+	.nr_gpios = ARRAY_SIZE(sg060_tilt_gpios),
+
+	.axes = sg060_tilt_axes,
+	.nr_axes = ARRAY_SIZE(sg060_tilt_axes),
+
+	.states = sg060_tilt_states,
+	.nr_states = ARRAY_SIZE(sg060_tilt_states),
+
+	.debounce_interval = 100,
+
+	.poll_interval = 1000,
+	.enable = sg060_tilt_enable,
+	.disable = sg060_tilt_disable,
+};
+
+static struct platform_device sg060_device_tilt = {
+	.name = "gpio-tilt-polled",
+	.id = -1,
+	.dev = {
+		.platform_data = &sg060_tilt_pdata,
+	},
+};
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 22d875fde53a..e53b443d1e33 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -179,6 +179,20 @@ config INPUT_APANEL
 	 To compile this driver as a module, choose M here: the module will
 	 be called apanel.
 
+config INPUT_GPIO_TILT_POLLED
+	tristate "Polled GPIO tilt switch"
+	depends on GENERIC_GPIO
+	select INPUT_POLLDEV
+	help
+	  This driver implements support for tilt switches connected
+	  to GPIO pins that are not capable of generating interrupts.
+
+	  The list of gpios to use and the mapping of their states
+	  to specific angles is done via platform data.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gpio_tilt_polled.
+
 config INPUT_IXP4XX_BEEPER
 	tristate "IXP4XX Beeper support"
 	depends on ARCH_IXP4XX
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index a244fc6a781c..90070c1a4ad3 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_INPUT_CMA3000_I2C)		+= cma3000_d0x_i2c.o
 obj-$(CONFIG_INPUT_COBALT_BTNS)		+= cobalt_btns.o
 obj-$(CONFIG_INPUT_DM355EVM)		+= dm355evm_keys.o
 obj-$(CONFIG_HP_SDC_RTC)		+= hp_sdc_rtc.o
+obj-$(CONFIG_INPUT_GPIO_TILT_POLLED)	+= gpio_tilt_polled.o
 obj-$(CONFIG_INPUT_IXP4XX_BEEPER)	+= ixp4xx-beeper.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)	+= keyspan_remote.o
 obj-$(CONFIG_INPUT_KXTJ9)		+= kxtj9.o
diff --git a/drivers/input/misc/gpio_tilt_polled.c b/drivers/input/misc/gpio_tilt_polled.c
new file mode 100644
index 000000000000..277a0574c199
--- /dev/null
+++ b/drivers/input/misc/gpio_tilt_polled.c
@@ -0,0 +1,213 @@
+/*
+ *  Driver for tilt switches connected via GPIO lines
+ *  not capable of generating interrupts
+ *
+ *  Copyright (C) 2011 Heiko Stuebner <heiko@sntech.de>
+ *
+ *  based on: drivers/input/keyboard/gpio_keys_polled.c
+ *
+ *  Copyright (C) 2007-2010 Gabor Juhos <juhosg@openwrt.org>
+ *  Copyright (C) 2010 Nuno Goncalves <nunojpg@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/input-polldev.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/input/gpio_tilt.h>
+
+#define DRV_NAME	"gpio-tilt-polled"
+
+struct gpio_tilt_polled_dev {
+	struct input_polled_dev *poll_dev;
+	struct device *dev;
+	const struct gpio_tilt_platform_data *pdata;
+
+	int last_state;
+
+	int threshold;
+	int count;
+};
+
+static void gpio_tilt_polled_poll(struct input_polled_dev *dev)
+{
+	struct gpio_tilt_polled_dev *tdev = dev->private;
+	const struct gpio_tilt_platform_data *pdata = tdev->pdata;
+	struct input_dev *input = dev->input;
+	struct gpio_tilt_state *tilt_state = NULL;
+	int state, i;
+
+	if (tdev->count < tdev->threshold) {
+		tdev->count++;
+	} else {
+		state = 0;
+		for (i = 0; i < pdata->nr_gpios; i++)
+			state |= (!!gpio_get_value(pdata->gpios[i].gpio) << i);
+
+		if (state != tdev->last_state) {
+			for (i = 0; i < pdata->nr_states; i++)
+				if (pdata->states[i].gpios == state)
+					tilt_state = &pdata->states[i];
+
+			if (tilt_state) {
+				for (i = 0; i < pdata->nr_axes; i++)
+					input_report_abs(input,
+							 pdata->axes[i].axis,
+							 tilt_state->axes[i]);
+
+				input_sync(input);
+			}
+
+			tdev->count = 0;
+			tdev->last_state = state;
+		}
+	}
+}
+
+static void gpio_tilt_polled_open(struct input_polled_dev *dev)
+{
+	struct gpio_tilt_polled_dev *tdev = dev->private;
+	const struct gpio_tilt_platform_data *pdata = tdev->pdata;
+
+	if (pdata->enable)
+		pdata->enable(tdev->dev);
+
+	/* report initial state of the axes */
+	tdev->last_state = -1;
+	tdev->count = tdev->threshold;
+	gpio_tilt_polled_poll(tdev->poll_dev);
+}
+
+static void gpio_tilt_polled_close(struct input_polled_dev *dev)
+{
+	struct gpio_tilt_polled_dev *tdev = dev->private;
+	const struct gpio_tilt_platform_data *pdata = tdev->pdata;
+
+	if (pdata->disable)
+		pdata->disable(tdev->dev);
+}
+
+static int __devinit gpio_tilt_polled_probe(struct platform_device *pdev)
+{
+	const struct gpio_tilt_platform_data *pdata = pdev->dev.platform_data;
+	struct device *dev = &pdev->dev;
+	struct gpio_tilt_polled_dev *tdev;
+	struct input_polled_dev *poll_dev;
+	struct input_dev *input;
+	int error, i;
+
+	if (!pdata || !pdata->poll_interval)
+		return -EINVAL;
+
+	tdev = kzalloc(sizeof(struct gpio_tilt_polled_dev), GFP_KERNEL);
+	if (!tdev) {
+		dev_err(dev, "no memory for private data\n");
+		return -ENOMEM;
+	}
+
+	error = gpio_request_array(pdata->gpios, pdata->nr_gpios);
+	if (error) {
+		dev_err(dev,
+			"Could not request tilt GPIOs: %d\n", error);
+		goto err_free_tdev;
+	}
+
+	poll_dev = input_allocate_polled_device();
+	if (!poll_dev) {
+		dev_err(dev, "no memory for polled device\n");
+		error = -ENOMEM;
+		goto err_free_gpios;
+	}
+
+	poll_dev->private = tdev;
+	poll_dev->poll = gpio_tilt_polled_poll;
+	poll_dev->poll_interval = pdata->poll_interval;
+	poll_dev->open = gpio_tilt_polled_open;
+	poll_dev->close = gpio_tilt_polled_close;
+
+	input = poll_dev->input;
+
+	input->name = pdev->name;
+	input->phys = DRV_NAME"/input0";
+	input->dev.parent = &pdev->dev;
+
+	input->id.bustype = BUS_HOST;
+	input->id.vendor = 0x0001;
+	input->id.product = 0x0001;
+	input->id.version = 0x0100;
+
+	__set_bit(EV_ABS, input->evbit);
+	for (i = 0; i < pdata->nr_axes; i++)
+		input_set_abs_params(input, pdata->axes[i].axis,
+				     pdata->axes[i].min, pdata->axes[i].max,
+				     pdata->axes[i].fuzz, pdata->axes[i].flat);
+
+	tdev->threshold = DIV_ROUND_UP(pdata->debounce_interval,
+				       pdata->poll_interval);
+
+	tdev->poll_dev = poll_dev;
+	tdev->dev = dev;
+	tdev->pdata = pdata;
+
+	error = input_register_polled_device(poll_dev);
+	if (error) {
+		dev_err(dev, "unable to register polled device, err=%d\n",
+			error);
+		goto err_free_polldev;
+	}
+
+	platform_set_drvdata(pdev, tdev);
+
+	return 0;
+
+err_free_polldev:
+	input_free_polled_device(poll_dev);
+err_free_gpios:
+	gpio_free_array(pdata->gpios, pdata->nr_gpios);
+err_free_tdev:
+	kfree(tdev);
+
+	return error;
+}
+
+static int __devexit gpio_tilt_polled_remove(struct platform_device *pdev)
+{
+	struct gpio_tilt_polled_dev *tdev = platform_get_drvdata(pdev);
+	const struct gpio_tilt_platform_data *pdata = tdev->pdata;
+
+	platform_set_drvdata(pdev, NULL);
+
+	input_unregister_polled_device(tdev->poll_dev);
+	input_free_polled_device(tdev->poll_dev);
+
+	gpio_free_array(pdata->gpios, pdata->nr_gpios);
+
+	kfree(tdev);
+
+	return 0;
+}
+
+static struct platform_driver gpio_tilt_polled_driver = {
+	.probe	= gpio_tilt_polled_probe,
+	.remove	= __devexit_p(gpio_tilt_polled_remove),
+	.driver	= {
+		.name	= DRV_NAME,
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(gpio_tilt_polled_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Heiko Stuebner <heiko@sntech.de>");
+MODULE_DESCRIPTION("Polled GPIO tilt driver");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/include/linux/input/gpio_tilt.h b/include/linux/input/gpio_tilt.h
new file mode 100644
index 000000000000..c1cc52d380e0
--- /dev/null
+++ b/include/linux/input/gpio_tilt.h
@@ -0,0 +1,73 @@
+#ifndef _INPUT_GPIO_TILT_H
+#define _INPUT_GPIO_TILT_H
+
+/**
+ * struct gpio_tilt_axis - Axis used by the tilt switch
+ * @axis:		Constant describing the axis, e.g. ABS_X
+ * @min:		minimum value for abs_param
+ * @max:		maximum value for abs_param
+ * @fuzz:		fuzz value for abs_param
+ * @flat:		flat value for abs_param
+ */
+struct gpio_tilt_axis {
+	int axis;
+	int min;
+	int max;
+	int fuzz;
+	int flat;
+};
+
+/**
+ * struct gpio_tilt_state - state description
+ * @gpios:		bitfield of gpio target-states for the value
+ * @axes:		array containing the axes settings for the gpio state
+ *			The array indizes must correspond to the axes defined
+ *			in platform_data
+ *
+ * This structure describes a supported axis settings
+ * and the necessary gpio-state which represent it.
+ *
+ * The n-th bit in the bitfield describes the state of the n-th GPIO
+ * from the gpios-array defined in gpio_regulator_config below.
+ */
+struct gpio_tilt_state {
+	int gpios;
+	int *axes;
+};
+
+/**
+ * struct gpio_tilt_platform_data
+ * @gpios:		Array containing the gpios determining the tilt state
+ * @nr_gpios:		Number of gpios
+ * @axes:		Array of gpio_tilt_axis descriptions
+ * @nr_axes:		Number of axes
+ * @states:		Array of gpio_tilt_state entries describing
+ *			the gpio state for specific tilts
+ * @nr_states:		Number of states available
+ * @debounce_interval:	debounce ticks interval in msecs
+ * @poll_interval:	polling interval in msecs - for polling driver only
+ * @enable:		callback to enable the tilt switch
+ * @disable:		callback to disable the tilt switch
+ *
+ * This structure contains gpio-tilt-switch configuration
+ * information that must be passed by platform code to the
+ * gpio-tilt input driver.
+ */
+struct gpio_tilt_platform_data {
+	struct gpio *gpios;
+	int nr_gpios;
+
+	struct gpio_tilt_axis *axes;
+	int nr_axes;
+
+	struct gpio_tilt_state *states;
+	int nr_states;
+
+	int debounce_interval;
+
+	unsigned int poll_interval;
+	int (*enable)(struct device *dev);
+	void (*disable)(struct device *dev);
+};
+
+#endif
-- 
cgit v1.2.3


From 4585790d1cde32a5719c24412e9845e031358e08 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 30 Nov 2011 10:55:14 +0000
Subject: ASoC: Allow more WM8958/WM1811 button levels with default handler

The WM8958 and WM1811 support detecting a range of buttons. Allow the
user to provide platform data enabling more of these levels without
having to write a custom detection handler.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/wm8994/pdata.h |  3 +++
 sound/soc/codecs/wm8994.c        | 42 ++++++++++++++++++++++++++++++++--------
 sound/soc/codecs/wm8994.h        |  1 +
 3 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index ea32f306dca6..195ade95af38 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -168,6 +168,9 @@ struct wm8994_pdata {
 	/* WM8958 microphone bias configuration */
 	int micbias[2];
 
+	/* WM8958 microphone detection ranges */
+	u16 micd_lvl_sel;
+
 	/* Disable the internal pull downs on the LDOs if they are
 	 * always driven (eg, connected to an always on supply or
 	 * GPIO that always drives an output.  If they float power
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 45bfa09f2e45..3e52d40866d2 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -3043,6 +3043,7 @@ static void wm8958_default_micdet(u16 status, void *data)
 {
 	struct snd_soc_codec *codec = data;
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	int report;
 
 	dev_dbg(codec->dev, "MICDET %x\n", status);
 
@@ -3055,7 +3056,7 @@ static void wm8958_default_micdet(u16 status, void *data)
 		wm8958_micd_set_rate(codec);
 
 		snd_soc_jack_report(wm8994->micdet[0].jack, 0,
-				    SND_JACK_BTN_0 | SND_JACK_HEADSET);
+				    wm8994->btn_mask | SND_JACK_HEADSET);
 
 		return;
 	}
@@ -3088,12 +3089,27 @@ static void wm8958_default_micdet(u16 status, void *data)
 
 	/* Report short circuit as a button */
 	if (wm8994->jack_mic) {
+		report = 0;
 		if (status & 0x4)
-			snd_soc_jack_report(wm8994->micdet[0].jack,
-					    SND_JACK_BTN_0, SND_JACK_BTN_0);
-		else
-			snd_soc_jack_report(wm8994->micdet[0].jack,
-					    0, SND_JACK_BTN_0);
+			report |= SND_JACK_BTN_0;
+
+		if (status & 0x8)
+			report |= SND_JACK_BTN_1;
+
+		if (status & 0x10)
+			report |= SND_JACK_BTN_2;
+
+		if (status & 0x20)
+			report |= SND_JACK_BTN_3;
+
+		if (status & 0x40)
+			report |= SND_JACK_BTN_4;
+
+		if (status & 0x80)
+			report |= SND_JACK_BTN_5;
+
+		snd_soc_jack_report(wm8994->micdet[0].jack, report,
+				    wm8994->btn_mask);
 	}
 }
 
@@ -3118,6 +3134,7 @@ int wm8958_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack,
 {
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	struct wm8994 *control = wm8994->wm8994;
+	u16 micd_lvl_sel;
 
 	switch (control->type) {
 	case WM1811:
@@ -3145,9 +3162,18 @@ int wm8958_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack,
 
 		wm8958_micd_set_rate(codec);
 
-		/* Detect microphones and short circuits */
+		/* Detect microphones and short circuits by default */
+		if (wm8994->pdata->micd_lvl_sel)
+			micd_lvl_sel = wm8994->pdata->micd_lvl_sel;
+		else
+			micd_lvl_sel = 0x41;
+
+		wm8994->btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+			SND_JACK_BTN_2 | SND_JACK_BTN_3 |
+			SND_JACK_BTN_4 | SND_JACK_BTN_5;
+
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_2,
-				    WM8958_MICD_LVL_SEL_MASK, 0x41);
+				    WM8958_MICD_LVL_SEL_MASK, micd_lvl_sel);
 
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
 				    WM8958_MICD_ENA, WM8958_MICD_ENA);
diff --git a/sound/soc/codecs/wm8994.h b/sound/soc/codecs/wm8994.h
index c3e71d72eb6a..77e3d8c9eeb8 100644
--- a/sound/soc/codecs/wm8994.h
+++ b/sound/soc/codecs/wm8994.h
@@ -129,6 +129,7 @@ struct wm8994_priv {
 	struct wm8994_micdet micdet[2];
 	bool detecting;
 	bool jack_mic;
+	int btn_mask;
 
 	wm8958_micdet_cb jack_cb;
 	void *jack_cb_data;
-- 
cgit v1.2.3


From af6b6fe41c4bc9e7933d66bbbf5106e0e7e6e484 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 30 Nov 2011 20:32:05 +0000
Subject: ASoC: Implement support for WM1811A jack detection

The WM1811A features an advanced low power accessory detection subsystem
which allows the device to be maintained in a very low power state while
the system is idle without sacrificing any accessory detection features.

Implement software support for this, automatically managing the power
configuration of the device depending on the detected accessory.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/wm8994/registers.h |  16 +++
 sound/soc/codecs/wm8994.c            | 264 ++++++++++++++++++++++++++++++++---
 sound/soc/codecs/wm8994.h            |   3 +
 3 files changed, 264 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/registers.h b/include/linux/mfd/wm8994/registers.h
index 83a9caec0e43..ebfc92fdcd77 100644
--- a/include/linux/mfd/wm8994/registers.h
+++ b/include/linux/mfd/wm8994/registers.h
@@ -242,6 +242,7 @@
 #define WM8994_GPIO_4                           0x703
 #define WM8994_GPIO_5                           0x704
 #define WM8994_GPIO_6                           0x705
+#define WM1811_JACKDET_CTRL			0x705
 #define WM8994_GPIO_7                           0x706
 #define WM8994_GPIO_8                           0x707
 #define WM8994_GPIO_9                           0x708
@@ -1852,6 +1853,9 @@
 /*
  * R57 (0x39) - AntiPOP (2)
  */
+#define WM1811_JACKDET_MODE_MASK                0x0180  /* JACKDET_MODE - [8:7] */
+#define WM1811_JACKDET_MODE_SHIFT                    7  /* JACKDET_MODE - [8:7] */
+#define WM1811_JACKDET_MODE_WIDTH                    2  /* JACKDET_MODE - [8:7] */
 #define WM8994_MICB2_DISCH                      0x0100  /* MICB2_DISCH */
 #define WM8994_MICB2_DISCH_MASK                 0x0100  /* MICB2_DISCH */
 #define WM8994_MICB2_DISCH_SHIFT                     8  /* MICB2_DISCH */
@@ -4186,6 +4190,18 @@
 #define WM8994_STL_SEL_SHIFT                         0  /* STL_SEL */
 #define WM8994_STL_SEL_WIDTH                         1  /* STL_SEL */
 
+/*
+ * R1797 (0x705) - JACKDET Ctrl
+ */
+#define WM1811_JACKDET_DB                       0x0100  /* JACKDET_DB */
+#define WM1811_JACKDET_DB_MASK                  0x0100  /* JACKDET_DB */
+#define WM1811_JACKDET_DB_SHIFT                      8  /* JACKDET_DB */
+#define WM1811_JACKDET_DB_WIDTH                      1  /* JACKDET_DB */
+#define WM1811_JACKDET_LVL                      0x0040  /* JACKDET_LVL */
+#define WM1811_JACKDET_LVL_MASK                 0x0040  /* JACKDET_LVL */
+#define WM1811_JACKDET_LVL_SHIFT                     6  /* JACKDET_LVL */
+#define WM1811_JACKDET_LVL_WIDTH                     1  /* JACKDET_LVL */
+
 /*
  * R1824 (0x720) - Pull Control (1)
  */
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index e65745bc1003..2e28f472b963 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -38,6 +38,11 @@
 #include "wm8994.h"
 #include "wm_hubs.h"
 
+#define WM1811_JACKDET_MODE_NONE  0x0000
+#define WM1811_JACKDET_MODE_JACK  0x0100
+#define WM1811_JACKDET_MODE_MIC   0x0080
+#define WM1811_JACKDET_MODE_AUDIO 0x0180
+
 #define WM8994_NUM_DRC 3
 #define WM8994_NUM_EQ  3
 
@@ -55,23 +60,34 @@ static int wm8994_retune_mobile_base[] = {
 
 static void wm8958_default_micdet(u16 status, void *data);
 
-static const struct {
+struct wm8958_micd_rate {
 	int sysclk;
 	bool idle;
 	int start;
 	int rate;
-} wm8958_micd_rates[] = {
+};
+
+static const struct wm8958_micd_rate micdet_rates[] = {
 	{ 32768,       true,  1, 4 },
 	{ 32768,       false, 1, 1 },
 	{ 44100 * 256, true,  7, 10 },
 	{ 44100 * 256, false, 7, 10 },
 };
 
+static const struct wm8958_micd_rate jackdet_rates[] = {
+	{ 32768,       true,  0, 1 },
+	{ 32768,       false, 0, 1 },
+	{ 44100 * 256, true,  7, 10 },
+	{ 44100 * 256, false, 7, 10 },
+};
+
 static void wm8958_micd_set_rate(struct snd_soc_codec *codec)
 {
 	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
 	int best, i, sysclk, val;
 	bool idle;
+	const struct wm8958_micd_rate *rates;
+	int num_rates;
 
 	if (wm8994->jack_cb != wm8958_default_micdet)
 		return;
@@ -84,19 +100,27 @@ static void wm8958_micd_set_rate(struct snd_soc_codec *codec)
 	else
 		sysclk = wm8994->aifclk[0];
 
+	if (wm8994->jackdet) {
+		rates = jackdet_rates;
+		num_rates = ARRAY_SIZE(jackdet_rates);
+	} else {
+		rates = micdet_rates;
+		num_rates = ARRAY_SIZE(micdet_rates);
+	}
+
 	best = 0;
-	for (i = 0; i < ARRAY_SIZE(wm8958_micd_rates); i++) {
-		if (wm8958_micd_rates[i].idle != idle)
+	for (i = 0; i < num_rates; i++) {
+		if (rates[i].idle != idle)
 			continue;
-		if (abs(wm8958_micd_rates[i].sysclk - sysclk) <
-		    abs(wm8958_micd_rates[best].sysclk - sysclk))
+		if (abs(rates[i].sysclk - sysclk) <
+		    abs(rates[best].sysclk - sysclk))
 			best = i;
-		else if (wm8958_micd_rates[best].idle != idle)
+		else if (rates[best].idle != idle)
 			best = i;
 	}
 
-	val = wm8958_micd_rates[best].start << WM8958_MICD_BIAS_STARTTIME_SHIFT
-		| wm8958_micd_rates[best].rate << WM8958_MICD_RATE_SHIFT;
+	val = rates[best].start << WM8958_MICD_BIAS_STARTTIME_SHIFT
+		| rates[best].rate << WM8958_MICD_RATE_SHIFT;
 
 	snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
 			    WM8958_MICD_BIAS_STARTTIME_MASK |
@@ -762,6 +786,74 @@ SOC_SINGLE_TLV("MIXINL IN1RP Boost Volume", WM8994_INPUT_MIXER_1, 8, 1, 0,
 	       mixin_boost_tlv),
 };
 
+/* We run all mode setting through a function to enforce audio mode */
+static void wm1811_jackdet_set_mode(struct snd_soc_codec *codec, u16 mode)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	if (wm8994->active_refcount)
+		mode = WM1811_JACKDET_MODE_AUDIO;
+
+	snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+			    WM1811_JACKDET_MODE_MASK, mode);
+
+	if (mode == WM1811_JACKDET_MODE_MIC)
+		msleep(2);
+}
+
+static void active_reference(struct snd_soc_codec *codec)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+	mutex_lock(&wm8994->accdet_lock);
+
+	wm8994->active_refcount++;
+
+	dev_dbg(codec->dev, "Active refcount incremented, now %d\n",
+		wm8994->active_refcount);
+
+	if (wm8994->active_refcount == 1) {
+		/* If we're using jack detection go into audio mode */
+		if (wm8994->jackdet && wm8994->jack_cb) {
+			snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+					    WM1811_JACKDET_MODE_MASK,
+					    WM1811_JACKDET_MODE_AUDIO);
+			msleep(2);
+		}
+	}
+
+	mutex_unlock(&wm8994->accdet_lock);
+}
+
+static void active_dereference(struct snd_soc_codec *codec)
+{
+	struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+	u16 mode;
+
+	mutex_lock(&wm8994->accdet_lock);
+
+	wm8994->active_refcount--;
+
+	dev_dbg(codec->dev, "Active refcount decremented, now %d\n",
+		wm8994->active_refcount);
+
+	if (wm8994->active_refcount == 0) {
+		/* Go into appropriate detection only mode */
+		if (wm8994->jackdet && wm8994->jack_cb) {
+			if (wm8994->jack_mic || wm8994->mic_detecting)
+				mode = WM1811_JACKDET_MODE_MIC;
+			else
+				mode = WM1811_JACKDET_MODE_JACK;
+
+			snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+					    WM1811_JACKDET_MODE_MASK,
+					    mode);
+		}
+	}
+
+	mutex_unlock(&wm8994->accdet_lock);
+}
+
 static int clk_sys_event(struct snd_soc_dapm_widget *w,
 			 struct snd_kcontrol *kcontrol, int event)
 {
@@ -1919,6 +2011,8 @@ static int _wm8994_set_fll(struct snd_soc_codec *codec, int id, int src,
 	if (freq_out) {
 		/* Enable VMID if we need it */
 		if (!was_enabled) {
+			active_reference(codec);
+
 			switch (control->type) {
 			case WM8994:
 				vmid_reference(codec);
@@ -1962,6 +2056,8 @@ static int _wm8994_set_fll(struct snd_soc_codec *codec, int id, int src,
 			default:
 				break;
 			}
+
+			active_dereference(codec);
 		}
 	}
 
@@ -2091,6 +2187,9 @@ static int wm8994_set_bias_level(struct snd_soc_codec *codec,
 		default:
 			break;
 		}
+
+		if (codec->dapm.bias_level == SND_SOC_BIAS_STANDBY)
+			active_reference(codec);
 		break;
 
 	case SND_SOC_BIAS_STANDBY:
@@ -2143,6 +2242,9 @@ static int wm8994_set_bias_level(struct snd_soc_codec *codec,
 					    WM8994_LINEOUT2_DISCH);
 		}
 
+		if (codec->dapm.bias_level == SND_SOC_BIAS_PREPARE)
+			active_dereference(codec);
+
 		/* MICBIAS into bypass mode on newer devices */
 		switch (control->type) {
 		case WM8958:
@@ -2168,6 +2270,7 @@ static int wm8994_set_bias_level(struct snd_soc_codec *codec,
 		break;
 	}
 	codec->dapm.bias_level = level;
+
 	return 0;
 }
 
@@ -2715,6 +2818,9 @@ static int wm8994_suspend(struct snd_soc_codec *codec, pm_message_t state)
 		snd_soc_update_bits(codec, WM8994_MICBIAS, WM8994_MICD_ENA, 0);
 		break;
 	case WM1811:
+		snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+				    WM1811_JACKDET_MODE_MASK, 0);
+		/* Fall through */
 	case WM8958:
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
 				    WM8958_MICD_ENA, 0);
@@ -2784,6 +2890,13 @@ static int wm8994_resume(struct snd_soc_codec *codec)
 					    WM8994_MICD_ENA, WM8994_MICD_ENA);
 		break;
 	case WM1811:
+		if (wm8994->jackdet && wm8994->jack_cb) {
+			/* Restart from idle */
+			snd_soc_update_bits(codec, WM8994_ANTIPOP_2,
+					    WM1811_JACKDET_MODE_MASK,
+					    WM1811_JACKDET_MODE_JACK);
+			break;
+		}
 	case WM8958:
 		if (wm8994->jack_cb)
 			snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
@@ -3047,17 +3160,20 @@ static void wm8958_default_micdet(u16 status, void *data)
 
 	dev_dbg(codec->dev, "MICDET %x\n", status);
 
-	/* If nothing present then clear our statuses */
+	/* Either nothing present or just starting detection */
 	if (!(status & WM8958_MICD_STS)) {
-		dev_dbg(codec->dev, "Detected open circuit\n");
-		wm8994->jack_mic = false;
-		wm8994->mic_detecting = true;
+		if (!wm8994->jackdet) {
+			/* If nothing present then clear our statuses */
+			dev_dbg(codec->dev, "Detected open circuit\n");
+			wm8994->jack_mic = false;
+			wm8994->mic_detecting = true;
 
-		wm8958_micd_set_rate(codec);
-
-		snd_soc_jack_report(wm8994->micdet[0].jack, 0,
-				    wm8994->btn_mask | SND_JACK_HEADSET);
+			wm8958_micd_set_rate(codec);
 
+			snd_soc_jack_report(wm8994->micdet[0].jack, 0,
+					    wm8994->btn_mask |
+					     SND_JACK_HEADSET);
+		}
 		return;
 	}
 
@@ -3085,6 +3201,15 @@ static void wm8958_default_micdet(u16 status, void *data)
 
 		snd_soc_jack_report(wm8994->micdet[0].jack, SND_JACK_HEADPHONE,
 				    SND_JACK_HEADSET);
+
+		/* If we have jackdet that will detect removal */
+		if (wm8994->jackdet) {
+			snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
+					    WM8958_MICD_ENA, 0);
+
+			wm1811_jackdet_set_mode(codec,
+						WM1811_JACKDET_MODE_JACK);
+		}
 	}
 
 	/* Report short circuit as a button */
@@ -3113,6 +3238,56 @@ static void wm8958_default_micdet(u16 status, void *data)
 	}
 }
 
+static irqreturn_t wm1811_jackdet_irq(int irq, void *data)
+{
+	struct wm8994_priv *wm8994 = data;
+	struct snd_soc_codec *codec = wm8994->codec;
+	int reg;
+
+	mutex_lock(&wm8994->accdet_lock);
+
+	reg = snd_soc_read(codec, WM1811_JACKDET_CTRL);
+	if (reg < 0) {
+		dev_err(codec->dev, "Failed to read jack status: %d\n", reg);
+		mutex_unlock(&wm8994->accdet_lock);
+		return IRQ_NONE;
+	}
+
+	dev_dbg(codec->dev, "JACKDET %x\n", reg);
+
+	if (reg & WM1811_JACKDET_LVL) {
+		dev_dbg(codec->dev, "Jack detected\n");
+
+		snd_soc_jack_report(wm8994->micdet[0].jack,
+				    SND_JACK_MECHANICAL, SND_JACK_MECHANICAL);
+
+		/*
+		 * Start off measument of microphone impedence to find
+		 * out what's actually there.
+		 */
+		wm8994->mic_detecting = true;
+		wm1811_jackdet_set_mode(codec, WM1811_JACKDET_MODE_MIC);
+		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
+				    WM8958_MICD_ENA, WM8958_MICD_ENA);
+	} else {
+		dev_dbg(codec->dev, "Jack not detected\n");
+
+		snd_soc_jack_report(wm8994->micdet[0].jack, 0,
+				    SND_JACK_MECHANICAL | SND_JACK_HEADSET |
+				    wm8994->btn_mask);
+
+		wm8994->mic_detecting = false;
+		wm8994->jack_mic = false;
+		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
+				    WM8958_MICD_ENA, 0);
+		wm1811_jackdet_set_mode(codec, WM1811_JACKDET_MODE_JACK);
+	}
+
+	mutex_unlock(&wm8994->accdet_lock);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * wm8958_mic_detect - Enable microphone detection via the WM8958 IRQ
  *
@@ -3175,8 +3350,22 @@ int wm8958_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack,
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_2,
 				    WM8958_MICD_LVL_SEL_MASK, micd_lvl_sel);
 
-		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
-				    WM8958_MICD_ENA, WM8958_MICD_ENA);
+		WARN_ON(codec->dapm.bias_level > SND_SOC_BIAS_STANDBY);
+
+		/*
+		 * If we can use jack detection start off with that,
+		 * otherwise jump straight to microphone detection.
+		 */
+		if (wm8994->jackdet) {
+			snd_soc_update_bits(codec, WM8994_LDO_1,
+					    WM8994_LDO1_DISCH, 0);
+			wm1811_jackdet_set_mode(codec,
+						WM1811_JACKDET_MODE_JACK);
+		} else {
+			snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
+					    WM8958_MICD_ENA, WM8958_MICD_ENA);
+		}
+
 	} else {
 		snd_soc_update_bits(codec, WM8958_MIC_DETECT_1,
 				    WM8958_MICD_ENA, 0);
@@ -3193,6 +3382,18 @@ static irqreturn_t wm8958_mic_irq(int irq, void *data)
 	struct snd_soc_codec *codec = wm8994->codec;
 	int reg, count;
 
+	mutex_lock(&wm8994->accdet_lock);
+
+	/*
+	 * Jack detection may have detected a removal simulataneously
+	 * with an update of the MICDET status; if so it will have
+	 * stopped detection and we can ignore this interrupt.
+	 */
+	if (!(snd_soc_read(codec, WM8958_MIC_DETECT_1) & WM8958_MICD_ENA)) {
+		mutex_unlock(&wm8994->accdet_lock);
+		return IRQ_HANDLED;
+	}
+
 	/* We may occasionally read a detection without an impedence
 	 * range being provided - if that happens loop again.
 	 */
@@ -3200,6 +3401,7 @@ static irqreturn_t wm8958_mic_irq(int irq, void *data)
 	do {
 		reg = snd_soc_read(codec, WM8958_MIC_DETECT_3);
 		if (reg < 0) {
+			mutex_unlock(&wm8994->accdet_lock);
 			dev_err(codec->dev,
 				"Failed to read mic detect status: %d\n",
 				reg);
@@ -3230,6 +3432,8 @@ static irqreturn_t wm8958_mic_irq(int irq, void *data)
 		dev_warn(codec->dev, "Accessory detection with no callback\n");
 
 out:
+	mutex_unlock(&wm8994->accdet_lock);
+
 	return IRQ_HANDLED;
 }
 
@@ -3280,6 +3484,8 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
 	wm8994->pdata = dev_get_platdata(codec->dev->parent);
 	wm8994->codec = codec;
 
+	mutex_init(&wm8994->accdet_lock);
+
 	for (i = 0; i < ARRAY_SIZE(wm8994->fll_locked); i++)
 		init_completion(&wm8994->fll_locked[i]);
 
@@ -3428,6 +3634,21 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
 		}
 	}
 
+	switch (control->type) {
+	case WM1811:
+		if (wm8994->revision > 1) {
+			ret = wm8994_request_irq(wm8994->wm8994,
+						 WM8994_IRQ_GPIO(6),
+						 wm1811_jackdet_irq, "JACKDET",
+						 wm8994);
+			if (ret == 0)
+				wm8994->jackdet = true;
+		}
+		break;
+	default:
+		break;
+	}
+
 	wm8994->fll_locked_irq = true;
 	for (i = 0; i < ARRAY_SIZE(wm8994->fll_locked); i++) {
 		ret = wm8994_request_irq(wm8994->wm8994,
@@ -3650,6 +3871,8 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
 	return 0;
 
 err_irq:
+	if (wm8994->jackdet)
+		wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_GPIO(6), wm8994);
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_MIC2_SHRT, wm8994);
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_MIC2_DET, wm8994);
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_MIC1_SHRT, wm8994);
@@ -3688,6 +3911,9 @@ static int  wm8994_codec_remove(struct snd_soc_codec *codec)
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_TEMP_SHUT, codec);
 	wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_TEMP_WARN, codec);
 
+	if (wm8994->jackdet)
+		wm8994_free_irq(wm8994->wm8994, WM8994_IRQ_GPIO(6), wm8994);
+
 	switch (control->type) {
 	case WM8994:
 		if (wm8994->micdet_irq)
diff --git a/sound/soc/codecs/wm8994.h b/sound/soc/codecs/wm8994.h
index 8622bc4db2fe..6ef3f11878c6 100644
--- a/sound/soc/codecs/wm8994.h
+++ b/sound/soc/codecs/wm8994.h
@@ -85,6 +85,7 @@ struct wm8994_priv {
 	bool fll_locked_irq;
 
 	int vmid_refcount;
+	int active_refcount;
 
 	int dac_rates[2];
 	int lrclk_shared[2];
@@ -126,10 +127,12 @@ struct wm8994_priv {
 	const char **enh_eq_texts;
 	struct soc_enum enh_eq_enum;
 
+	struct mutex accdet_lock;
 	struct wm8994_micdet micdet[2];
 	bool mic_detecting;
 	bool jack_mic;
 	int btn_mask;
+	bool jackdet;
 
 	wm8958_micdet_cb jack_cb;
 	void *jack_cb_data;
-- 
cgit v1.2.3


From cd1707a99a2cb43cd8ab0c1952b455b218f15884 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Dec 2011 13:44:25 +0000
Subject: ASoC: Add platform data for WM8958/WM1811 microphone detection rates

Allow systems to override the default microphone detection rates using
platform data in case the settings are not suitable (eg, due to an
unusually noisy jack).

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/mfd/wm8994/pdata.h | 20 ++++++++++++++++++++
 sound/soc/codecs/wm8994.c        | 12 ++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 195ade95af38..5256f1f41d7f 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -113,6 +113,23 @@ struct wm8958_enh_eq_cfg {
 	u16 regs[WM8958_ENH_EQ_REGS];
 };
 
+/**
+ * Microphone detection rates, used to tune response rates and power
+ * consumption for WM8958/WM1811 microphone detection.
+ *
+ * @sysclk: System clock rate to use this configuration for.
+ * @idle: True if this configuration should use when no accessory is detected,
+ *        false otherwise.
+ * @start: Value for MICD_BIAS_START_TIME register field (not shifted).
+ * @rate: Value for MICD_RATE register field (not shifted).
+ */
+struct wm8958_micd_rate {
+	int sysclk;
+	bool idle;
+	int start;
+	int rate;
+};
+
 struct wm8994_pdata {
 	int gpio_base;
 
@@ -144,6 +161,9 @@ struct wm8994_pdata {
 	int num_enh_eq_cfgs;
 	struct wm8958_enh_eq_cfg *enh_eq_cfgs;
 
+	int num_micd_rates;
+	struct wm8958_micd_rate *micd_rates;
+
         /* LINEOUT can be differential or single ended */
         unsigned int lineout1_diff:1;
         unsigned int lineout2_diff:1;
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 91f3638ab33f..6bdf8137c7e8 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -60,13 +60,6 @@ static int wm8994_retune_mobile_base[] = {
 
 static void wm8958_default_micdet(u16 status, void *data);
 
-struct wm8958_micd_rate {
-	int sysclk;
-	bool idle;
-	int start;
-	int rate;
-};
-
 static const struct wm8958_micd_rate micdet_rates[] = {
 	{ 32768,       true,  1, 4 },
 	{ 32768,       false, 1, 1 },
@@ -100,7 +93,10 @@ static void wm8958_micd_set_rate(struct snd_soc_codec *codec)
 	else
 		sysclk = wm8994->aifclk[0];
 
-	if (wm8994->jackdet) {
+	if (wm8994->pdata && wm8994->pdata->micd_rates) {
+		rates = wm8994->pdata->micd_rates;
+		num_rates = wm8994->pdata->num_micd_rates;
+	} else if (wm8994->jackdet) {
 		rates = jackdet_rates;
 		num_rates = ARRAY_SIZE(jackdet_rates);
 	} else {
-- 
cgit v1.2.3


From faf02f8fee5563ea7f950b3f5f08c654aa6c4525 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 2 Dec 2011 17:44:50 +0900
Subject: serial: sh-sci: per-port modem control.

The bulk of the ports do not support any sort of modem control, so
blindly twiddling the MCE bit doesn't accomplish much. We now require
ports to manually specify which line supports modem control signals.

While at it, tidy up the RTS/CTSIO handling in SCSPTR parts so it's a bit
more obvious what's going on (and without clobbering other configurations
in the process).

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/tty/serial/sh-sci.c | 31 ++++++++++++++++++++-----------
 include/linux/serial_sci.h  | 10 ++++++++++
 2 files changed, 30 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 46deaaec836d..fd60d72eac89 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -474,8 +474,15 @@ static void sci_init_pins(struct uart_port *port, unsigned int cflag)
 	if (!reg->size)
 		return;
 
-	if (!(cflag & CRTSCTS))
-		sci_out(port, SCSPTR, 0x0080); /* Set RTS = 1 */
+	if ((s->cfg->capabilities & SCIx_HAVE_RTSCTS) &&
+	    ((!(cflag & CRTSCTS)))) {
+		unsigned short status;
+
+		status = sci_in(port, SCSPTR);
+		status &= ~SCSPTR_CTSIO;
+		status |= SCSPTR_RTSIO;
+		sci_out(port, SCSPTR, status); /* Set RTS = 1 */
+	}
 }
 
 static int sci_txfill(struct uart_port *port)
@@ -1764,16 +1771,18 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
 
 	sci_init_pins(port, termios->c_cflag);
 
-	reg = sci_getreg(port, SCFCR);
-	if (reg->size) {
-		unsigned short ctrl;
+	if (s->cfg->capabilities & SCIx_HAVE_RTSCTS) {
+		reg = sci_getreg(port, SCFCR);
+		if (reg->size) {
+			unsigned short ctrl;
 
-		ctrl = sci_in(port, SCFCR);
-		if (termios->c_cflag & CRTSCTS)
-			ctrl |= SCFCR_MCE;
-		else
-			ctrl &= ~SCFCR_MCE;
-		sci_out(port, SCFCR, ctrl);
+			ctrl = sci_in(port, SCFCR);
+			if (termios->c_cflag & CRTSCTS)
+				ctrl |= SCFCR_MCE;
+			else
+				ctrl &= ~SCFCR_MCE;
+			sci_out(port, SCFCR, ctrl);
+		}
 	}
 
 	sci_out(port, SCSCR, s->cfg->scscr);
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 369273a52679..15b1bdcaa9f5 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -49,6 +49,10 @@ enum {
 
 #define SCIF_DEFAULT_ERROR_MASK (SCIF_PER | SCIF_FER | SCIF_ER | SCIF_BRK)
 
+/* SCSPTR, optional */
+#define SCSPTR_RTSIO	(1 << 7)
+#define SCSPTR_CTSIO	(1 << 5)
+
 /* Offsets into the sci_port->irqs array */
 enum {
 	SCIx_ERI_IRQ,
@@ -108,6 +112,11 @@ struct plat_sci_port_ops {
 	void (*init_pins)(struct uart_port *, unsigned int cflag);
 };
 
+/*
+ * Port-specific capabilities
+ */
+#define SCIx_HAVE_RTSCTS	(1 << 0)
+
 /*
  * Platform device specific platform_data struct
  */
@@ -116,6 +125,7 @@ struct plat_sci_port {
 	unsigned int	irqs[SCIx_NR_IRQS];	/* ERI, RXI, TXI, BRI */
 	unsigned int	type;			/* SCI / SCIF / IRDA */
 	upf_t		flags;			/* UPF_* flags */
+	unsigned long	capabilities;		/* Port features/capabilities */
 
 	unsigned int	scbrr_algo_id;		/* SCBRR calculation algo */
 	unsigned int	scscr;			/* SCSCR initialization */
-- 
cgit v1.2.3


From 50f0959ad4f9ac1c5ee208bb820de299a1b3730b Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 2 Dec 2011 20:09:48 +0900
Subject: serial: sh-sci: Handle GPIO function requests.

This adds initial support for requesting the various GPIO functions
necessary for certain ports. This just plugs in dumb request/free logic,
but serves as a building block for migrating off of the ->init_pins mess
to a wholly gpiolib backed solution (primarily parts with external
RTS/CTS pins, but will also allow us to clean up RXD pin testing).

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/tty/serial/sh-sci.c | 71 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/serial_sci.h  | 12 ++++++++
 2 files changed, 81 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 761a800cb483..9e62349b3d9f 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -50,6 +50,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <linux/gpio.h>
 
 #ifdef CONFIG_SUPERH
 #include <asm/sh_bios.h>
@@ -73,6 +74,7 @@ struct sci_port {
 	struct clk		*fclk;
 
 	char			*irqstr[SCIx_NR_IRQS];
+	char			*gpiostr[SCIx_NR_FNS];
 
 	struct dma_chan			*chan_tx;
 	struct dma_chan			*chan_rx;
@@ -1105,6 +1107,67 @@ static void sci_free_irq(struct sci_port *port)
 	}
 }
 
+static const char *sci_gpio_names[SCIx_NR_FNS] = {
+	"sck", "rxd", "txd", "cts", "rts",
+};
+
+static const char *sci_gpio_str(unsigned int index)
+{
+	return sci_gpio_names[index];
+}
+
+static void __devinit sci_init_gpios(struct sci_port *port)
+{
+	struct uart_port *up = &port->port;
+	int i;
+
+	if (!port->cfg)
+		return;
+
+	for (i = 0; i < SCIx_NR_FNS; i++) {
+		const char *desc;
+		int ret;
+
+		if (!port->cfg->gpios[i])
+			continue;
+
+		desc = sci_gpio_str(i);
+
+		port->gpiostr[i] = kasprintf(GFP_KERNEL, "%s:%s",
+					     dev_name(up->dev), desc);
+
+		/*
+		 * If we've failed the allocation, we can still continue
+		 * on with a NULL string.
+		 */
+		if (!port->gpiostr[i])
+			dev_notice(up->dev, "%s string allocation failure\n",
+				   desc);
+
+		ret = gpio_request(port->cfg->gpios[i], port->gpiostr[i]);
+		if (unlikely(ret != 0)) {
+			dev_notice(up->dev, "failed %s gpio request\n", desc);
+
+			/*
+			 * If we can't get the GPIO for whatever reason,
+			 * no point in keeping the verbose string around.
+			 */
+			kfree(port->gpiostr[i]);
+		}
+	}
+}
+
+static void sci_free_gpios(struct sci_port *port)
+{
+	int i;
+
+	for (i = 0; i < SCIx_NR_FNS; i++)
+		if (port->cfg->gpios[i]) {
+			gpio_free(port->cfg->gpios[i]);
+			kfree(port->gpiostr[i]);
+		}
+}
+
 static unsigned int sci_tx_empty(struct uart_port *port)
 {
 	unsigned short status = sci_in(port, SCxSR);
@@ -1962,6 +2025,8 @@ static int __devinit sci_init_single(struct platform_device *dev,
 	struct uart_port *port = &sci_port->port;
 	int ret;
 
+	sci_port->cfg	= p;
+
 	port->ops	= &sci_uart_ops;
 	port->iotype	= UPIO_MEM;
 	port->line	= index;
@@ -2007,6 +2072,8 @@ static int __devinit sci_init_single(struct platform_device *dev,
 
 		port->dev = &dev->dev;
 
+		sci_init_gpios(sci_port);
+
 		pm_runtime_irq_safe(&dev->dev);
 		pm_runtime_enable(&dev->dev);
 	}
@@ -2041,8 +2108,6 @@ static int __devinit sci_init_single(struct platform_device *dev,
 		p->error_mask |= (1 << p->overrun_bit);
 	}
 
-	sci_port->cfg		= p;
-
 	port->mapbase		= p->mapbase;
 	port->type		= p->type;
 	port->flags		= p->flags;
@@ -2249,6 +2314,8 @@ static int sci_remove(struct platform_device *dev)
 	cpufreq_unregister_notifier(&port->freq_transition,
 				    CPUFREQ_TRANSITION_NOTIFIER);
 
+	sci_free_gpios(port);
+
 	uart_remove_one_port(&sci_uart_driver, &port->port);
 
 	clk_put(port->iclk);
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 15b1bdcaa9f5..78779074f6e8 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -64,6 +64,17 @@ enum {
 	SCIx_MUX_IRQ = SCIx_NR_IRQS,	/* special case */
 };
 
+/* Offsets into the sci_port->gpios array */
+enum {
+	SCIx_SCK,
+	SCIx_RXD,
+	SCIx_TXD,
+	SCIx_CTS,
+	SCIx_RTS,
+
+	SCIx_NR_FNS,
+};
+
 enum {
 	SCIx_PROBE_REGTYPE,
 
@@ -123,6 +134,7 @@ struct plat_sci_port_ops {
 struct plat_sci_port {
 	unsigned long	mapbase;		/* resource base */
 	unsigned int	irqs[SCIx_NR_IRQS];	/* ERI, RXI, TXI, BRI */
+	unsigned int	gpios[SCIx_NR_FNS];	/* SCK, RXD, TXD, CTS, RTS */
 	unsigned int	type;			/* SCI / SCIF / IRDA */
 	upf_t		flags;			/* UPF_* flags */
 	unsigned long	capabilities;		/* Port features/capabilities */
-- 
cgit v1.2.3


From f528f0b8e53d73b18be71e96693cfab9322f33c7 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 26 Sep 2011 17:12:53 +0100
Subject: kmemleak: Handle percpu memory allocation

This patch adds kmemleak callbacks from the percpu allocator, reducing a
number of false positives caused by kmemleak not scanning such memory
blocks. The percpu chunks are never reported as leaks because of current
kmemleak limitations with the __percpu pointer not pointing directly to
the actual chunks.

Reported-by: Huajun Li <huajun.li.lee@gmail.com>
Acked-by: Christoph Lameter <cl@gentwo.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/kmemleak.txt |  3 ++
 include/linux/kmemleak.h   |  8 ++++++
 mm/kmemleak.c              | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/percpu.c                | 12 +++++++-
 4 files changed, 94 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
index 51063e681ca4..b6e39739a36d 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/kmemleak.txt
@@ -127,7 +127,10 @@ See the include/linux/kmemleak.h header for the functions prototype.
 
 kmemleak_init		 - initialize kmemleak
 kmemleak_alloc		 - notify of a memory block allocation
+kmemleak_alloc_percpu	 - notify of a percpu memory block allocation
 kmemleak_free		 - notify of a memory block freeing
+kmemleak_free_part	 - notify of a partial memory block freeing
+kmemleak_free_percpu	 - notify of a percpu memory block freeing
 kmemleak_not_leak	 - mark an object as not a leak
 kmemleak_ignore		 - do not scan or report an object as leak
 kmemleak_scan_area	 - add scan areas inside a memory block
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 99d9a6766f7e..2a5e5548a1d2 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -26,8 +26,10 @@
 extern void kmemleak_init(void) __ref;
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
 			   gfp_t gfp) __ref;
+extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref;
 extern void kmemleak_free(const void *ptr) __ref;
 extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
+extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
 extern void kmemleak_padding(const void *ptr, unsigned long offset,
 			     size_t size) __ref;
 extern void kmemleak_not_leak(const void *ptr) __ref;
@@ -68,6 +70,9 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
 					    gfp_t gfp)
 {
 }
+static inline void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
+{
+}
 static inline void kmemleak_free(const void *ptr)
 {
 }
@@ -77,6 +82,9 @@ static inline void kmemleak_free_part(const void *ptr, size_t size)
 static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
 {
 }
+static inline void kmemleak_free_percpu(const void __percpu *ptr)
+{
+}
 static inline void kmemleak_not_leak(const void *ptr)
 {
 }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index b4f4e6021c1b..15c50302ff93 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -230,8 +230,10 @@ static int kmemleak_skip_disable;
 /* kmemleak operation type for early logging */
 enum {
 	KMEMLEAK_ALLOC,
+	KMEMLEAK_ALLOC_PERCPU,
 	KMEMLEAK_FREE,
 	KMEMLEAK_FREE_PART,
+	KMEMLEAK_FREE_PERCPU,
 	KMEMLEAK_NOT_LEAK,
 	KMEMLEAK_IGNORE,
 	KMEMLEAK_SCAN_AREA,
@@ -852,6 +854,20 @@ out:
 	rcu_read_unlock();
 }
 
+/*
+ * Log an early allocated block and populate the stack trace.
+ */
+static void early_alloc_percpu(struct early_log *log)
+{
+	unsigned int cpu;
+	const void __percpu *ptr = log->ptr;
+
+	for_each_possible_cpu(cpu) {
+		log->ptr = per_cpu_ptr(ptr, cpu);
+		early_alloc(log);
+	}
+}
+
 /**
  * kmemleak_alloc - register a newly allocated object
  * @ptr:	pointer to beginning of the object
@@ -878,6 +894,34 @@ void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
 }
 EXPORT_SYMBOL_GPL(kmemleak_alloc);
 
+/**
+ * kmemleak_alloc_percpu - register a newly allocated __percpu object
+ * @ptr:	__percpu pointer to beginning of the object
+ * @size:	size of the object
+ *
+ * This function is called from the kernel percpu allocator when a new object
+ * (memory block) is allocated (alloc_percpu). It assumes GFP_KERNEL
+ * allocation.
+ */
+void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
+{
+	unsigned int cpu;
+
+	pr_debug("%s(0x%p, %zu)\n", __func__, ptr, size);
+
+	/*
+	 * Percpu allocations are only scanned and not reported as leaks
+	 * (min_count is set to 0).
+	 */
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		for_each_possible_cpu(cpu)
+			create_object((unsigned long)per_cpu_ptr(ptr, cpu),
+				      size, 0, GFP_KERNEL);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);
+
 /**
  * kmemleak_free - unregister a previously registered object
  * @ptr:	pointer to beginning of the object
@@ -916,6 +960,28 @@ void __ref kmemleak_free_part(const void *ptr, size_t size)
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_part);
 
+/**
+ * kmemleak_free_percpu - unregister a previously registered __percpu object
+ * @ptr:	__percpu pointer to beginning of the object
+ *
+ * This function is called from the kernel percpu allocator when an object
+ * (memory block) is freed (free_percpu).
+ */
+void __ref kmemleak_free_percpu(const void __percpu *ptr)
+{
+	unsigned int cpu;
+
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		for_each_possible_cpu(cpu)
+			delete_object_full((unsigned long)per_cpu_ptr(ptr,
+								      cpu));
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_FREE_PERCPU, ptr, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
+
 /**
  * kmemleak_not_leak - mark an allocated object as false positive
  * @ptr:	pointer to beginning of the object
@@ -1727,12 +1793,18 @@ void __init kmemleak_init(void)
 		case KMEMLEAK_ALLOC:
 			early_alloc(log);
 			break;
+		case KMEMLEAK_ALLOC_PERCPU:
+			early_alloc_percpu(log);
+			break;
 		case KMEMLEAK_FREE:
 			kmemleak_free(log->ptr);
 			break;
 		case KMEMLEAK_FREE_PART:
 			kmemleak_free_part(log->ptr, log->size);
 			break;
+		case KMEMLEAK_FREE_PERCPU:
+			kmemleak_free_percpu(log->ptr);
+			break;
 		case KMEMLEAK_NOT_LEAK:
 			kmemleak_not_leak(log->ptr);
 			break;
diff --git a/mm/percpu.c b/mm/percpu.c
index 3bb810a72006..86c5bdbdc370 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -67,6 +67,7 @@
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
+#include <linux/kmemleak.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
@@ -710,6 +711,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
 	const char *err;
 	int slot, off, new_alloc;
 	unsigned long flags;
+	void __percpu *ptr;
 
 	if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
 		WARN(true, "illegal size (%zu) or align (%zu) for "
@@ -802,7 +804,9 @@ area_found:
 	mutex_unlock(&pcpu_alloc_mutex);
 
 	/* return address relative to base address */
-	return __addr_to_pcpu_ptr(chunk->base_addr + off);
+	ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
+	kmemleak_alloc_percpu(ptr, size);
+	return ptr;
 
 fail_unlock:
 	spin_unlock_irqrestore(&pcpu_lock, flags);
@@ -916,6 +920,8 @@ void free_percpu(void __percpu *ptr)
 	if (!ptr)
 		return;
 
+	kmemleak_free_percpu(ptr);
+
 	addr = __pcpu_ptr_to_addr(ptr);
 
 	spin_lock_irqsave(&pcpu_lock, flags);
@@ -1637,6 +1643,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 			rc = -ENOMEM;
 			goto out_free_areas;
 		}
+		/* kmemleak tracks the percpu allocations separately */
+		kmemleak_free(ptr);
 		areas[group] = ptr;
 
 		base = min(ptr, base);
@@ -1751,6 +1759,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 					   "for cpu%u\n", psize_str, cpu);
 				goto enomem;
 			}
+			/* kmemleak tracks the percpu allocations separately */
+			kmemleak_free(ptr);
 			pages[j++] = virt_to_page(ptr);
 		}
 
-- 
cgit v1.2.3


From 55af77969fbd7a841838220ea2287432e0da8ae5 Mon Sep 17 00:00:00 2001
From: Mitsuo Hayasaka <mitsuo.hayasaka.hu@hitachi.com>
Date: Tue, 29 Nov 2011 15:08:36 +0900
Subject: x86: Panic on detection of stack overflow

Currently, messages are just output on the detection of stack
overflow, which is not sufficient for systems that need a
high reliability. This is because in general the overflow may
corrupt data, and the additional corruption may occur due to
reading them unless systems stop.

This patch adds the sysctl parameter
kernel.panic_on_stackoverflow and causes a panic when detecting
the overflows of kernel, IRQ and exception stacks except user
stack according to the parameter. It is disabled by default.

Signed-off-by: Mitsuo Hayasaka <mitsuo.hayasaka.hu@hitachi.com>
Cc: yrl.pp-manager.tt@hitachi.com
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/r/20111129060836.11076.12323.stgit@ltc219.sdl.hitachi.co.jp
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/sysctl/kernel.txt | 14 ++++++++++++++
 arch/x86/kernel/irq_32.c        |  2 ++
 arch/x86/kernel/irq_64.c        |  5 +++++
 include/linux/kernel.h          |  1 +
 kernel/sysctl.c                 |  9 +++++++++
 5 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 1f2463671a1a..6d8cd8b2c30d 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -49,6 +49,7 @@ show up in /proc/sys/kernel:
 - panic
 - panic_on_oops
 - panic_on_unrecovered_nmi
+- panic_on_stackoverflow
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -393,6 +394,19 @@ Controls the kernel's behaviour when an oops or BUG is encountered.
 
 ==============================================================
 
+panic_on_stackoverflow:
+
+Controls the kernel's behavior when detecting the overflows of
+kernel, IRQ and exception stacks except a user stack.
+This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
+
+0: try to continue operation.
+
+1: panic immediately.
+
+==============================================================
+
+
 pid_max:
 
 PID allocation wrap value.  When the kernel's next PID value
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 72090705a656..e16e99ebd7ad 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -43,6 +43,8 @@ static void print_stack_overflow(void)
 {
 	printk(KERN_WARNING "low stack detected by irq handler\n");
 	dump_stack();
+	if (sysctl_panic_on_stackoverflow)
+		panic("low stack detected by irq handler - check messages\n");
 }
 
 #else
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 928a7e909619..42552b0dce6a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
+int sysctl_panic_on_stackoverflow;
+
 /*
  * Probabilistic stack overflow check:
  *
@@ -65,6 +67,9 @@ static inline void stack_overflow_check(struct pt_regs *regs)
 		current->comm, curbase, regs->sp,
 		irq_stack_top, irq_stack_bottom,
 		estack_top, estack_bottom);
+
+	if (sysctl_panic_on_stackoverflow)
+		panic("low stack detected by irq handler - check messages\n");
 #endif
 }
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e8b1597b5cf2..ff83683c0b9d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -341,6 +341,7 @@ extern int panic_timeout;
 extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
 extern int panic_on_io_nmi;
+extern int sysctl_panic_on_stackoverflow;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned flag);
 extern int test_taint(unsigned flag);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ae2719643854..f487f257e05e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -803,6 +803,15 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	{
+		.procname	= "panic_on_stackoverflow",
+		.data		= &sysctl_panic_on_stackoverflow,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 	{
 		.procname	= "bootloader_type",
 		.data		= &bootloader_type,
-- 
cgit v1.2.3


From b50cac55bf859d5b2fdcc1803a553a251b703456 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 6 Oct 2011 14:08:18 -0400
Subject: PCI/sysfs: add per pci device msi[x] irq listing (v5)

This patch adds a per-pci-device subdirectory in sysfs called:
/sys/bus/pci/devices/<device>/msi_irqs

This sub-directory exports the set of msi vectors allocated by a given
pci device, by creating a numbered sub-directory for each vector beneath
msi_irqs.  For each vector various attributes can be exported.
Currently the only attribute is called mode, which tracks the
operational mode of that vector (msi vs. msix)

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-bus-pci |  18 ++++++
 drivers/pci/msi.c                       | 111 ++++++++++++++++++++++++++++++++
 include/linux/msi.h                     |   3 +
 include/linux/pci.h                     |   1 +
 4 files changed, 133 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf26ce10..34f51100f029 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
 		re-discover previously removed devices.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../msi_irqs/
+Date:		September, 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		The /sys/devices/.../msi_irqs directory contains a variable set
+		of sub-directories, with each sub-directory being named after a
+		corresponding msi irq vector allocated to that device.  Each
+		numbered sub-directory N contains attributes of that irq.
+		Note that this directory is not created for device drivers which
+		do not support msi irqs
+
+What:		/sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date:		September 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		This attribute indicates the mode that the irq vector named by
+		the parent directory is in (msi vs. msix)
+
 What:		/sys/bus/pci/devices/.../remove
 Date:		January 2009
 Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 0e6d04d7ba4f..e6b6b9c67023 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -323,6 +323,8 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
 		list_del(&entry->list);
 		kfree(entry);
 	}
@@ -403,6 +405,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+			     char *buf)
+{
+	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+void msi_kobj_release(struct kobject *kobj)
+{
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+	.release = msi_kobj_release,
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		kobj = &entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+				     "%u", entry->irq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -454,6 +548,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, pos, 1);
@@ -574,6 +675,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	msix_program_entries(dev, entries);
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		ret = 0;
+		goto error;
+	}
+
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
@@ -732,6 +839,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
 	pci_msi_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -830,6 +939,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
 	pci_msix_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 05acced439a3..ce93a341337d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
+
+	struct kobject kobj;
 };
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7cda65b5f798..84225c756bd1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -336,6 +336,7 @@ struct pci_dev {
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
+	struct kset *msi_kset;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_ATS
-- 
cgit v1.2.3


From 69166fbf02c7a21745013f2de037bf7af26e4279 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 2 Nov 2011 14:07:15 -0600
Subject: PCI: Fix PRI and PASID consistency

These are extended capabilities, rename and move to proper
group for consistency.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/ats.c        | 20 ++++++++++----------
 include/linux/pci_regs.h |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 7ec56fb0bd78..831e1920386c 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -174,7 +174,7 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	u32 max_requests;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -205,7 +205,7 @@ void pci_disable_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return;
 
@@ -226,7 +226,7 @@ bool pci_pri_enabled(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return false;
 
@@ -248,7 +248,7 @@ int pci_reset_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -281,7 +281,7 @@ bool pci_pri_stopped(struct pci_dev *pdev)
 	u16 control, status;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return true;
 
@@ -310,7 +310,7 @@ int pci_pri_status(struct pci_dev *pdev)
 	u16 status, control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -341,7 +341,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	u16 control, supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
@@ -375,7 +375,7 @@ void pci_disable_pasid(struct pci_dev *pdev)
 	u16 control = 0;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return;
 
@@ -399,7 +399,7 @@ int pci_pasid_features(struct pci_dev *pdev)
 	u16 supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
@@ -425,7 +425,7 @@ int pci_max_pasids(struct pci_dev *pdev)
 	u16 supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index b5d9657f3100..090d3a9f5b26 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -537,7 +537,9 @@
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
 #define PCI_EXT_CAP_ID_SRIOV	16
+#define PCI_EXT_CAP_ID_PRI	19
 #define PCI_EXT_CAP_ID_LTR	24
+#define PCI_EXT_CAP_ID_PASID	27
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -664,7 +666,6 @@
 #define  PCI_ATS_MIN_STU	12	/* shift of minimum STU block */
 
 /* Page Request Interface */
-#define PCI_PRI_CAP		0x13    /* PRI capability ID */
 #define PCI_PRI_CONTROL_OFF	0x04	/* Offset of control register */
 #define PCI_PRI_STATUS_OFF	0x06	/* Offset of status register */
 #define PCI_PRI_ENABLE		0x0001	/* Enable mask */
@@ -676,7 +677,6 @@
 #define PCI_PRI_ALLOC_REQ_OFF	0x0c	/* Cap offset for max reqs allowed */
 
 /* PASID capability */
-#define PCI_PASID_CAP		0x1b    /* PASID capability ID */
 #define PCI_PASID_CAP_OFF	0x04    /* PASID feature register */
 #define PCI_PASID_CONTROL_OFF   0x06    /* PASID control register */
 #define PCI_PASID_ENABLE	0x01	/* Enable/Supported bit */
-- 
cgit v1.2.3


From 3c076351c4027a56d5005a39a0b518a4ba393ce2 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 10 Nov 2011 16:38:33 -0500
Subject: PCI: Rework ASPM disable code

Right now we forcibly clear ASPM state on all devices if the BIOS indicates
that the feature isn't supported. Based on the Microsoft presentation
"PCI Express In Depth for Windows Vista and Beyond", I'm starting to think
that this may be an error. The implication is that unless the platform
grants full control via _OSC, Windows will not touch any PCIe features -
including ASPM. In that case clearing ASPM state would be an error unless
the platform has granted us that control.

This patch reworks the ASPM disabling code such that the actual clearing
of state is triggered by a successful handoff of PCIe control to the OS.
The general ASPM code undergoes some changes in order to ensure that the
ability to clear the bits isn't overridden by ASPM having already been
disabled. Further, this theoretically now allows for situations where
only a subset of PCIe roots hand over control, leaving the others in the
BIOS state.

It's difficult to know for sure that this is the right thing to do -
there's zero public documentation on the interaction between all of these
components. But enough vendors enable ASPM on platforms and then set this
bit that it seems likely that they're expecting the OS to leave them alone.

Measured to save around 5W on an idle Thinkpad X220.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c  |  7 ++++++
 drivers/pci/pci-acpi.c   |  1 -
 drivers/pci/pcie/aspm.c  | 58 ++++++++++++++++++++++++++++++------------------
 include/linux/pci-aspm.h |  4 ++--
 4 files changed, 46 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 2672c798272f..7aff6312ce7c 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -596,6 +596,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 		if (ACPI_SUCCESS(status)) {
 			dev_info(root->bus->bridge,
 				"ACPI _OSC control (0x%02x) granted\n", flags);
+			if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
+				/*
+				 * We have ASPM control, but the FADT indicates
+				 * that it's unsupported. Clear it.
+				 */
+				pcie_clear_aspm(root->bus);
+			}
 		} else {
 			dev_info(root->bus->bridge,
 				"ACPI _OSC request failed (%s), "
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 4ecb6408b0d6..c8e75851a314 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -395,7 +395,6 @@ static int __init acpi_pci_init(void)
 
 	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
 		printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
-		pcie_clear_aspm();
 		pcie_no_aspm();
 	}
 
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index cbfbab18be91..1cfbf228fbb1 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -68,7 +68,7 @@ struct pcie_link_state {
 	struct aspm_latency acceptable[8];
 };
 
-static int aspm_disabled, aspm_force, aspm_clear_state;
+static int aspm_disabled, aspm_force;
 static bool aspm_support_enabled = true;
 static DEFINE_MUTEX(aspm_lock);
 static LIST_HEAD(link_list);
@@ -500,9 +500,6 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 	int pos;
 	u32 reg32;
 
-	if (aspm_clear_state)
-		return -EINVAL;
-
 	/*
 	 * Some functions in a slot might not all be PCIe functions,
 	 * very strange. Disable ASPM for the whole slot
@@ -574,9 +571,6 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 	    pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
 		return;
 
-	if (aspm_disabled && !aspm_clear_state)
-		return;
-
 	/* VIA has a strange chipset, root port is under a bridge */
 	if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT &&
 	    pdev->bus->self)
@@ -608,7 +602,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 	 * the BIOS's expectation, we'll do so once pci_enable_device() is
 	 * called.
 	 */
-	if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) {
+	if (aspm_policy != POLICY_POWERSAVE) {
 		pcie_config_aspm_path(link);
 		pcie_set_clkpm(link, policy_to_clkpm_state(link));
 	}
@@ -649,8 +643,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link, *root, *parent_link;
 
-	if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) ||
-	    !parent || !parent->link_state)
+	if (!pci_is_pcie(pdev) || !parent || !parent->link_state)
 		return;
 	if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
 	    (parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
@@ -734,13 +727,18 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
  */
-static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
+static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
+				     bool force)
 {
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link;
 
-	if (aspm_disabled || !pci_is_pcie(pdev))
+	if (aspm_disabled && !force)
+		return;
+
+	if (!pci_is_pcie(pdev))
 		return;
+
 	if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
 	    pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
 		parent = pdev;
@@ -768,16 +766,31 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 
 void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, false);
+	__pci_disable_link_state(pdev, state, false, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state_locked);
 
 void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, true);
+	__pci_disable_link_state(pdev, state, true, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
+void pcie_clear_aspm(struct pci_bus *bus)
+{
+	struct pci_dev *child;
+
+	/*
+	 * Clear any ASPM setup that the firmware has carried out on this bus
+	 */
+	list_for_each_entry(child, &bus->devices, bus_list) {
+		__pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
+					 PCIE_LINK_STATE_L1 |
+					 PCIE_LINK_STATE_CLKPM,
+					 false, true);
+	}
+}
+
 static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
 {
 	int i;
@@ -935,6 +948,7 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
 static int __init pcie_aspm_disable(char *str)
 {
 	if (!strcmp(str, "off")) {
+		aspm_policy = POLICY_DEFAULT;
 		aspm_disabled = 1;
 		aspm_support_enabled = false;
 		printk(KERN_INFO "PCIe ASPM is disabled\n");
@@ -947,16 +961,18 @@ static int __init pcie_aspm_disable(char *str)
 
 __setup("pcie_aspm=", pcie_aspm_disable);
 
-void pcie_clear_aspm(void)
-{
-	if (!aspm_force)
-		aspm_clear_state = 1;
-}
-
 void pcie_no_aspm(void)
 {
-	if (!aspm_force)
+	/*
+	 * Disabling ASPM is intended to prevent the kernel from modifying
+	 * existing hardware state, not to clear existing state. To that end:
+	 * (a) set policy to POLICY_DEFAULT in order to avoid changing state
+	 * (b) prevent userspace from changing policy
+	 */
+	if (!aspm_force) {
+		aspm_policy = POLICY_DEFAULT;
 		aspm_disabled = 1;
+	}
 }
 
 /**
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index 7cea7b6c1413..c8320144fe79 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -29,7 +29,7 @@ extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
 extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
-extern void pcie_clear_aspm(void);
+extern void pcie_clear_aspm(struct pci_bus *bus);
 extern void pcie_no_aspm(void);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
@@ -47,7 +47,7 @@ static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }
-static inline void pcie_clear_aspm(void)
+static inline void pcie_clear_aspm(struct pci_bus *bus)
 {
 }
 static inline void pcie_no_aspm(void)
-- 
cgit v1.2.3


From d90116ea38f7768dac0349f01ffbc2663d63b7e9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 6 Nov 2011 23:11:28 +0100
Subject: PCI/ACPI: Make acpiphp ignore root bridges using SHPC native hotplug

If the kernel has requested control of the SHPC native hotplug
feature for a given root bridge, the acpiphp driver should not try
to handle that root bridge and it should leave it to shpchp.
Failing to do so causes problems to happen if shpchp is loaded
and unloaded before loading acpiphp (ACPI-based hotplug won't work
in that case anyway).

To address this issue make find_root_bridges() ignore PCI root
bridges with SHPC native hotplug enabled and make add_bridge()
return error code if SHPC native hotplug is enabled for the given
root bridge.  This causes acpiphp to refuse to load if SHPC native
hotplug is enabled for all root bridges and to refuse binding to
the root bridges with SHPC native hotplug enabled.

Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpiphp_glue.c | 4 ++--
 include/linux/acpi.h               | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index fce1c54a0c8d..ba43c037de80 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -467,7 +467,7 @@ static int add_bridge(acpi_handle handle)
 	 * granted by the BIOS for it.
 	 */
 	root = acpi_pci_find_root(handle);
-	if (root && (root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL))
+	if (root && (root->osc_control_set & OSC_PCI_NATIVE_HOTPLUG))
 		return -ENODEV;
 
 	/* if the bridge doesn't have _STA, we assume it is always there */
@@ -1395,7 +1395,7 @@ find_root_bridges(acpi_handle handle, u32 lvl, void *context, void **rv)
 	if (!root)
 		return AE_OK;
 
-	if (root->osc_control_set & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL)
+	if (root->osc_control_set & OSC_PCI_NATIVE_HOTPLUG)
 		return AE_OK;
 
 	(*count)++;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6001b4da39dd..627a3a42e4d8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -302,6 +302,10 @@ extern bool osc_sb_apei_support_acked;
 				OSC_PCI_EXPRESS_PME_CONTROL |		\
 				OSC_PCI_EXPRESS_AER_CONTROL |		\
 				OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
+
+#define OSC_PCI_NATIVE_HOTPLUG	(OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |	\
+				OSC_SHPC_NATIVE_HP_CONTROL)
+
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 extern void acpi_early_init(void);
-- 
cgit v1.2.3


From 91f57d5e1be3db1e079c8696f1eab214f1c7922d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 11 Nov 2011 10:07:36 -0700
Subject: PCI: More PRI/PASID cleanup

More consistency cleanups.  Drop the _OFF, separate and indent
CTRL/CAP/STATUS bit definitions.  This helped find the previous
mis-use of bit 0 in the PASID capability register.

Reviewed-by: Joerg Roedel <joerg.roedel@amd.com>
Tested-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/ats.c        | 69 ++++++++++++++++++++++++------------------------
 include/linux/pci_regs.h | 30 +++++++++++----------
 2 files changed, 51 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 8e95a123d37a..2df49af6cc90 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -178,17 +178,18 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF,  &status);
-	if ((control & PCI_PRI_ENABLE) || !(status & PCI_PRI_STATUS_STOPPED))
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
+	if ((control & PCI_PRI_CTRL_ENABLE) ||
+	    !(status & PCI_PRI_STATUS_STOPPED))
 		return -EBUSY;
 
-	pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ_OFF, &max_requests);
+	pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests);
 	reqs = min(max_requests, reqs);
-	pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ_OFF, reqs);
+	pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
 
-	control |= PCI_PRI_ENABLE;
-	pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+	control |= PCI_PRI_CTRL_ENABLE;
+	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
 	return 0;
 }
@@ -209,9 +210,9 @@ void pci_disable_pri(struct pci_dev *pdev)
 	if (!pos)
 		return;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	control &= ~PCI_PRI_ENABLE;
-	pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	control &= ~PCI_PRI_CTRL_ENABLE;
+	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 }
 EXPORT_SYMBOL_GPL(pci_disable_pri);
 
@@ -230,9 +231,9 @@ bool pci_pri_enabled(struct pci_dev *pdev)
 	if (!pos)
 		return false;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
 
-	return (control & PCI_PRI_ENABLE) ? true : false;
+	return (control & PCI_PRI_CTRL_ENABLE) ? true : false;
 }
 EXPORT_SYMBOL_GPL(pci_pri_enabled);
 
@@ -252,13 +253,13 @@ int pci_reset_pri(struct pci_dev *pdev)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	if (control & PCI_PRI_ENABLE)
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	if (control & PCI_PRI_CTRL_ENABLE)
 		return -EBUSY;
 
-	control |= PCI_PRI_RESET;
+	control |= PCI_PRI_CTRL_RESET;
 
-	pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+	pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
 	return 0;
 }
@@ -285,10 +286,10 @@ bool pci_pri_stopped(struct pci_dev *pdev)
 	if (!pos)
 		return true;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF,  &status);
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
 
-	if (control & PCI_PRI_ENABLE)
+	if (control & PCI_PRI_CTRL_ENABLE)
 		return false;
 
 	return (status & PCI_PRI_STATUS_STOPPED) ? true : false;
@@ -314,11 +315,11 @@ int pci_pri_status(struct pci_dev *pdev)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-	pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF,  &status);
+	pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
 
 	/* Stopped bit is undefined when enable == 1, so clear it */
-	if (control & PCI_PRI_ENABLE)
+	if (control & PCI_PRI_CTRL_ENABLE)
 		status &= ~PCI_PRI_STATUS_STOPPED;
 
 	return status;
@@ -345,21 +346,21 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, &control);
-	pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF,     &supported);
+	pci_read_config_word(pdev, pos + PCI_PASID_CTRL, &control);
+	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 
-	if (control & PCI_PASID_ENABLE)
+	if (control & PCI_PASID_CTRL_ENABLE)
 		return -EINVAL;
 
-	supported &= PCI_PASID_EXEC | PCI_PASID_PRIV;
+	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
 	/* User wants to enable anything unsupported? */
 	if ((supported & features) != features)
 		return -EINVAL;
 
-	control = PCI_PASID_ENABLE | features;
+	control = PCI_PASID_CTRL_ENABLE | features;
 
-	pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control);
+	pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 
 	return 0;
 }
@@ -379,7 +380,7 @@ void pci_disable_pasid(struct pci_dev *pdev)
 	if (!pos)
 		return;
 
-	pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control);
+	pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 }
 EXPORT_SYMBOL_GPL(pci_disable_pasid);
 
@@ -390,8 +391,8 @@ EXPORT_SYMBOL_GPL(pci_disable_pasid);
  * Returns a negative value when no PASI capability is present.
  * Otherwise is returns a bitmask with supported features. Current
  * features reported are:
- * PCI_PASID_EXEC - Execute permission supported
- * PCI_PASID_PRIV - Priviledged mode supported
+ * PCI_PASID_CAP_EXEC - Execute permission supported
+ * PCI_PASID_CAP_PRIV - Priviledged mode supported
  */
 int pci_pasid_features(struct pci_dev *pdev)
 {
@@ -402,9 +403,9 @@ int pci_pasid_features(struct pci_dev *pdev)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported);
+	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 
-	supported &= PCI_PASID_EXEC | PCI_PASID_PRIV;
+	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
 	return supported;
 }
@@ -428,7 +429,7 @@ int pci_max_pasids(struct pci_dev *pdev)
 	if (!pos)
 		return -EINVAL;
 
-	pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported);
+	pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 
 	supported = (supported & PASID_NUMBER_MASK) >> PASID_NUMBER_SHIFT;
 
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 090d3a9f5b26..28fe380cb19d 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -666,22 +666,24 @@
 #define  PCI_ATS_MIN_STU	12	/* shift of minimum STU block */
 
 /* Page Request Interface */
-#define PCI_PRI_CONTROL_OFF	0x04	/* Offset of control register */
-#define PCI_PRI_STATUS_OFF	0x06	/* Offset of status register */
-#define PCI_PRI_ENABLE		0x0001	/* Enable mask */
-#define PCI_PRI_RESET		0x0002	/* Reset bit mask */
-#define PCI_PRI_STATUS_RF	0x0001  /* Request Failure */
-#define PCI_PRI_STATUS_UPRGI	0x0002  /* Unexpected PRG index */
-#define PCI_PRI_STATUS_STOPPED	0x0100  /* PRI Stopped */
-#define PCI_PRI_MAX_REQ_OFF	0x08	/* Cap offset for max reqs supported */
-#define PCI_PRI_ALLOC_REQ_OFF	0x0c	/* Cap offset for max reqs allowed */
+#define PCI_PRI_CTRL		0x04	/* PRI control register */
+#define  PCI_PRI_CTRL_ENABLE	0x01	/* Enable */
+#define  PCI_PRI_CTRL_RESET	0x02	/* Reset */
+#define PCI_PRI_STATUS		0x06	/* PRI status register */
+#define  PCI_PRI_STATUS_RF	0x001	/* Response Failure */
+#define  PCI_PRI_STATUS_UPRGI	0x002	/* Unexpected PRG index */
+#define  PCI_PRI_STATUS_STOPPED	0x100	/* PRI Stopped */
+#define PCI_PRI_MAX_REQ		0x08	/* PRI max reqs supported */
+#define PCI_PRI_ALLOC_REQ	0x0c	/* PRI max reqs allowed */
 
 /* PASID capability */
-#define PCI_PASID_CAP_OFF	0x04    /* PASID feature register */
-#define PCI_PASID_CONTROL_OFF   0x06    /* PASID control register */
-#define PCI_PASID_ENABLE	0x01	/* Enable/Supported bit */
-#define PCI_PASID_EXEC		0x02	/* Exec permissions Enable/Supported */
-#define PCI_PASID_PRIV		0x04	/* Priviledge Mode Enable/Support */
+#define PCI_PASID_CAP		0x04    /* PASID feature register */
+#define  PCI_PASID_CAP_EXEC	0x02	/* Exec permissions Supported */
+#define  PCI_PASID_CAP_PRIV	0x04	/* Priviledge Mode Supported */
+#define PCI_PASID_CTRL		0x06    /* PASID control register */
+#define  PCI_PASID_CTRL_ENABLE	0x01	/* Enable bit */
+#define  PCI_PASID_CTRL_EXEC	0x02	/* Exec permissions Enable */
+#define  PCI_PASID_CTRL_PRIV	0x04	/* Priviledge Mode Enable */
 
 /* Single Root I/O Virtualization */
 #define PCI_SRIOV_CAP		0x04	/* SR-IOV Capabilities */
-- 
cgit v1.2.3


From 54c29c635ae91f5d75ced7bffeaa77ba37ca02bb Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 29 Nov 2011 17:05:11 +0100
Subject: mm, x86: Remove debug_pagealloc_enabled

When (no)bootmem finish operation, it pass pages to buddy
allocator. Since debug_pagealloc_enabled is not set, we will do
not protect pages, what is not what we want with
CONFIG_DEBUG_PAGEALLOC=y.

To fix remove debug_pagealloc_enabled. That variable was
introduced by commit 12d6f21e "x86: do not PSE on
CONFIG_DEBUG_PAGEALLOC=y" to get more CPA (change page
attribude) code testing. But currently we have CONFIG_CPA_DEBUG,
which test CPA.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1322582711-14571-1-git-send-email-sgruszka@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pageattr.c |  6 ------
 include/linux/mm.h     | 10 ----------
 init/main.c            |  5 -----
 mm/debug-pagealloc.c   |  3 ---
 4 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f9e526742fa1..5031eefa051f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1333,12 +1333,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 					   numpages * PAGE_SIZE);
 	}
 
-	/*
-	 * If page allocator is not up yet then do not call c_p_a():
-	 */
-	if (!debug_pagealloc_enabled)
-		return;
-
 	/*
 	 * The return value is ignored as the calls cannot fail.
 	 * Large pages for identity mappings are not used at boot time
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3dc3a8c2c485..0a22db144753 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1537,23 +1537,13 @@ static inline void vm_stat_account(struct mm_struct *mm,
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-extern int debug_pagealloc_enabled;
-
 extern void kernel_map_pages(struct page *page, int numpages, int enable);
-
-static inline void enable_debug_pagealloc(void)
-{
-	debug_pagealloc_enabled = 1;
-}
 #ifdef CONFIG_HIBERNATION
 extern bool kernel_page_present(struct page *page);
 #endif /* CONFIG_HIBERNATION */
 #else
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable) {}
-static inline void enable_debug_pagealloc(void)
-{
-}
 #ifdef CONFIG_HIBERNATION
 static inline bool kernel_page_present(struct page *page) { return true; }
 #endif /* CONFIG_HIBERNATION */
diff --git a/init/main.c b/init/main.c
index 217ed23e9487..99c4ba30ba7e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -282,10 +282,6 @@ static int __init unknown_bootoption(char *param, char *val)
 	return 0;
 }
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-int __read_mostly debug_pagealloc_enabled = 0;
-#endif
-
 static int __init init_setup(char *str)
 {
 	unsigned int i;
@@ -597,7 +593,6 @@ asmlinkage void __init start_kernel(void)
 	}
 #endif
 	page_cgroup_init();
-	enable_debug_pagealloc();
 	debug_objects_mem_init();
 	kmemleak_init();
 	setup_per_cpu_pageset();
diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c
index 7cea557407f4..789ff70c8a4a 100644
--- a/mm/debug-pagealloc.c
+++ b/mm/debug-pagealloc.c
@@ -95,9 +95,6 @@ static void unpoison_pages(struct page *page, int n)
 
 void kernel_map_pages(struct page *page, int numpages, int enable)
 {
-	if (!debug_pagealloc_enabled)
-		return;
-
 	if (enable)
 		unpoison_pages(page, numpages);
 	else
-- 
cgit v1.2.3


From f21ffe9f6da6d3a69c518b7345c198d48d941c34 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 11 Aug 2011 16:50:56 -0400
Subject: swiotlb: Expose swiotlb_nr_tlb function to modules

As a mechanism to detect whether SWIOTLB is enabled or not.
We also fix the spelling - it was swioltb instead of
swiotlb.

CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
[v1: Ripped out swiotlb_enabled]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/swiotlb-xen.c | 2 +-
 include/linux/swiotlb.h   | 2 +-
 lib/swiotlb.c             | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 8e964b91c447..4864e5d72e72 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -153,7 +153,7 @@ void __init xen_swiotlb_init(int verbose)
 	char *m = NULL;
 	unsigned int repeat = 3;
 
-	nr_tbl = swioltb_nr_tbl();
+	nr_tbl = swiotlb_nr_tbl();
 	if (nr_tbl)
 		xen_io_tlb_nslabs = nr_tbl;
 	else {
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 445702c60d04..e872526fdc5f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -24,7 +24,7 @@ extern int swiotlb_force;
 
 extern void swiotlb_init(int verbose);
 extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
-extern unsigned long swioltb_nr_tbl(void);
+extern unsigned long swiotlb_nr_tbl(void);
 
 /*
  * Enumeration for sync targets
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 99093b396145..058935ef3975 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -110,11 +110,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-unsigned long swioltb_nr_tbl(void)
+unsigned long swiotlb_nr_tbl(void)
 {
 	return io_tlb_nslabs;
 }
-
+EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
@@ -321,6 +321,7 @@ void __init swiotlb_free(void)
 		free_bootmem_late(__pa(io_tlb_start),
 				  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	}
+	io_tlb_nslabs = 0;
 }
 
 static int is_swiotlb_buffer(phys_addr_t paddr)
-- 
cgit v1.2.3


From 7710ec36b6f516e026f9e29e50e67d2547c2a79b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Fri, 25 Nov 2011 18:44:05 -0500
Subject: svcrpc: make svc_delete_xprt static

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 -
 net/sunrpc/svc_xprt.c           | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 8620f79658d4..5488e593160a 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -118,7 +118,6 @@ void	svc_xprt_received(struct svc_xprt *);
 void	svc_xprt_put(struct svc_xprt *xprt);
 void	svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
 void	svc_close_xprt(struct svc_xprt *xprt);
-void	svc_delete_xprt(struct svc_xprt *xprt);
 int	svc_port_is_privileged(struct sockaddr *sin);
 int	svc_print_xprts(char *buf, int maxlen);
 struct	svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 447cd0eb415c..8046c6d1f9c2 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -22,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static void svc_age_temp_xprts(unsigned long closure);
+static void svc_delete_xprt(struct svc_xprt *xprt);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -878,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt)
 /*
  * Remove a dead transport
  */
-void svc_delete_xprt(struct svc_xprt *xprt)
+static void svc_delete_xprt(struct svc_xprt *xprt)
 {
 	struct svc_serv	*serv = xprt->xpt_server;
 	struct svc_deferred_req *dr;
-- 
cgit v1.2.3


From 2fefb8a09e7ed251ae8996e0c69066e74c5aa560 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 29 Nov 2011 11:35:35 -0500
Subject: svcrpc: destroy server sockets all at once

There's no reason I can see that we need to call sv_shutdown between
closing the two lists of sockets.

Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcsock.h |  2 +-
 net/sunrpc/svc.c               |  7 +------
 net/sunrpc/svc_xprt.c          | 11 ++++++++++-
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 85c50b40759d..c84e9741cb2a 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -34,7 +34,7 @@ struct svc_sock {
 /*
  * Function prototypes.
  */
-void		svc_close_all(struct list_head *);
+void		svc_close_all(struct svc_serv *);
 int		svc_recv(struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6e038884ae0c..60babf0a9847 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -528,16 +528,11 @@ svc_destroy(struct svc_serv *serv)
 
 	del_timer_sync(&serv->sv_temptimer);
 
-	svc_close_all(&serv->sv_tempsocks);
+	svc_close_all(serv);
 
 	if (serv->sv_shutdown)
 		serv->sv_shutdown(serv);
 
-	svc_close_all(&serv->sv_permsocks);
-
-	BUG_ON(!list_empty(&serv->sv_permsocks));
-	BUG_ON(!list_empty(&serv->sv_tempsocks));
-
 	cache_clean_deferred(serv);
 
 	if (svc_serv_is_pooled(serv))
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8046c6d1f9c2..099ddf99d2a1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -929,7 +929,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(svc_close_xprt);
 
-void svc_close_all(struct list_head *xprt_list)
+static void svc_close_list(struct list_head *xprt_list)
 {
 	struct svc_xprt *xprt;
 	struct svc_xprt *tmp;
@@ -947,6 +947,15 @@ void svc_close_all(struct list_head *xprt_list)
 	}
 }
 
+void svc_close_all(struct svc_serv *serv)
+{
+	svc_close_list(&serv->sv_tempsocks);
+	svc_close_list(&serv->sv_permsocks);
+	BUG_ON(!list_empty(&serv->sv_permsocks));
+	BUG_ON(!list_empty(&serv->sv_tempsocks));
+
+}
+
 /*
  * Handle defer and revisit of requests
  */
-- 
cgit v1.2.3


From bd4620ddf6d6eb3d9e7d073ad601fa4299d46ba9 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Tue, 6 Dec 2011 14:19:10 +0300
Subject: SUNRPC: create svc_xprt in proper network namespace

This patch makes svc_xprt inherit network namespace link from its socket.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h          | 2 +-
 net/sunrpc/svc_xprt.c                    | 6 +++---
 net/sunrpc/svcsock.c                     | 8 +++++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 5488e593160a..dfa900948af7 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -109,7 +109,7 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
 void	svc_unreg_xprt_class(struct svc_xprt_class *);
-void	svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
+void	svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
 		      struct svc_serv *);
 int	svc_create_xprt(struct svc_serv *, const char *, struct net *,
 			const int, const unsigned short, int);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 0d80c064e634..0633c7e2fe63 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -148,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put);
  * Called by transport drivers to initialize the transport independent
  * portion of the transport instance.
  */
-void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
-		   struct svc_serv *serv)
+void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
+		   struct svc_xprt *xprt, struct svc_serv *serv)
 {
 	memset(xprt, 0, sizeof(*xprt));
 	xprt->xpt_class = xcl;
@@ -164,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
 	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
-	xprt->xpt_net = get_net(&init_net);
+	xprt->xpt_net = get_net(net);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 71bed1c1c77a..277909e651ed 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -739,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
 	int err, level, optname, one = 1;
 
-	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
+	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
+		      &svsk->sk_xprt, serv);
 	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
 	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
 	svsk->sk_sk->sk_write_space = svc_write_space;
@@ -1343,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
 	struct sock	*sk = svsk->sk_sk;
 
-	svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
+	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
+		      &svsk->sk_xprt, serv);
 	set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
 	if (sk->sk_state == TCP_LISTEN) {
 		dprintk("setting up TCP socket for listening\n");
@@ -1659,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
 		return ERR_PTR(-ENOMEM);
 
 	xprt = &svsk->sk_xprt;
-	svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+	svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
 
 	serv->sv_bc_xprt = xprt;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ba1296d88de0..894cb42db91d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -453,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 
 	if (!cma_xprt)
 		return NULL;
-	svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv);
+	svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
 	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
-- 
cgit v1.2.3


From d57f341ba08c9f34ccd45a89729e73174d4a3325 Mon Sep 17 00:00:00 2001
From: Gabor Juhos <juhosg@openwrt.org>
Date: Mon, 20 Jun 2011 19:26:11 +0200
Subject: SERIAL: AR933X: Add driver for the built-in UART of the SoC

This patch adds the driver for the built-in UART of the
Atheros AR933X SoCs.

Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
Cc: linux-mips@linux-mips.org
Cc: Kathy Giori <kgiori@qca.qualcomm.com>
Cc: "Luis R.  Rodriguez" <rodrigue@qca.qualcomm.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: linux-serial@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/2526/
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 .../include/asm/mach-ath79/ar933x_uart_platform.h  |  18 +
 drivers/tty/serial/Kconfig                         |  23 +
 drivers/tty/serial/Makefile                        |   1 +
 drivers/tty/serial/ar933x_uart.c                   | 688 +++++++++++++++++++++
 include/linux/serial_core.h                        |   4 +
 5 files changed, 734 insertions(+)
 create mode 100644 arch/mips/include/asm/mach-ath79/ar933x_uart_platform.h
 create mode 100644 drivers/tty/serial/ar933x_uart.c

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/mach-ath79/ar933x_uart_platform.h b/arch/mips/include/asm/mach-ath79/ar933x_uart_platform.h
new file mode 100644
index 000000000000..6cb30f2b7198
--- /dev/null
+++ b/arch/mips/include/asm/mach-ath79/ar933x_uart_platform.h
@@ -0,0 +1,18 @@
+/*
+ *  Platform data definition for Atheros AR933X UART
+ *
+ *  Copyright (C) 2011 Gabor Juhos <juhosg@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#ifndef _AR933X_UART_PLATFORM_H
+#define _AR933X_UART_PLATFORM_H
+
+struct ar933x_uart_platform_data {
+	unsigned	uartclk;
+};
+
+#endif /* _AR933X_UART_PLATFORM_H */
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 925a1e547a83..95a0f5fe7d42 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1610,4 +1610,27 @@ config SERIAL_XILINX_PS_UART_CONSOLE
 	help
 	  Enable a Xilinx PS UART port to be the system console.
 
+config SERIAL_AR933X
+	bool "AR933X serial port support"
+	depends on SOC_AR933X
+	select SERIAL_CORE
+	help
+	  If you have an Atheros AR933X SOC based board and want to use the
+	  built-in UART of the SoC, say Y to this option.
+
+config SERIAL_AR933X_CONSOLE
+	bool "Console on AR933X serial port"
+	depends on SERIAL_AR933X=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  Enable a built-in UART port of the AR933X to be the system console.
+
+config SERIAL_AR933X_NR_UARTS
+	int "Maximum number of AR933X serial ports"
+	depends on SERIAL_AR933X
+	default "2"
+	help
+	  Set this to the number of serial ports you want the driver
+	  to support.
+
 endmenu
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index e10cf5b54b6d..76811cc58591 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -94,3 +94,4 @@ obj-$(CONFIG_SERIAL_MSM_SMD)	+= msm_smd_tty.o
 obj-$(CONFIG_SERIAL_MXS_AUART) += mxs-auart.o
 obj-$(CONFIG_SERIAL_LANTIQ)	+= lantiq.o
 obj-$(CONFIG_SERIAL_XILINX_PS_UART) += xilinx_uartps.o
+obj-$(CONFIG_SERIAL_AR933X)   += ar933x_uart.o
diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
new file mode 100644
index 000000000000..e4f60e2b87f3
--- /dev/null
+++ b/drivers/tty/serial/ar933x_uart.c
@@ -0,0 +1,688 @@
+/*
+ *  Atheros AR933X SoC built-in UART driver
+ *
+ *  Copyright (C) 2011 Gabor Juhos <juhosg@openwrt.org>
+ *
+ *  Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o.
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/sysrq.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+
+#include <asm/mach-ath79/ar933x_uart.h>
+#include <asm/mach-ath79/ar933x_uart_platform.h>
+
+#define DRIVER_NAME "ar933x-uart"
+
+#define AR933X_DUMMY_STATUS_RD	0x01
+
+static struct uart_driver ar933x_uart_driver;
+
+struct ar933x_uart_port {
+	struct uart_port	port;
+	unsigned int		ier;	/* shadow Interrupt Enable Register */
+};
+
+static inline unsigned int ar933x_uart_read(struct ar933x_uart_port *up,
+					    int offset)
+{
+	return readl(up->port.membase + offset);
+}
+
+static inline void ar933x_uart_write(struct ar933x_uart_port *up,
+				     int offset, unsigned int value)
+{
+	writel(value, up->port.membase + offset);
+}
+
+static inline void ar933x_uart_rmw(struct ar933x_uart_port *up,
+				  unsigned int offset,
+				  unsigned int mask,
+				  unsigned int val)
+{
+	unsigned int t;
+
+	t = ar933x_uart_read(up, offset);
+	t &= ~mask;
+	t |= val;
+	ar933x_uart_write(up, offset, t);
+}
+
+static inline void ar933x_uart_rmw_set(struct ar933x_uart_port *up,
+				       unsigned int offset,
+				       unsigned int val)
+{
+	ar933x_uart_rmw(up, offset, 0, val);
+}
+
+static inline void ar933x_uart_rmw_clear(struct ar933x_uart_port *up,
+					 unsigned int offset,
+					 unsigned int val)
+{
+	ar933x_uart_rmw(up, offset, val, 0);
+}
+
+static inline void ar933x_uart_start_tx_interrupt(struct ar933x_uart_port *up)
+{
+	up->ier |= AR933X_UART_INT_TX_EMPTY;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+}
+
+static inline void ar933x_uart_stop_tx_interrupt(struct ar933x_uart_port *up)
+{
+	up->ier &= ~AR933X_UART_INT_TX_EMPTY;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+}
+
+static inline void ar933x_uart_putc(struct ar933x_uart_port *up, int ch)
+{
+	unsigned int rdata;
+
+	rdata = ch & AR933X_UART_DATA_TX_RX_MASK;
+	rdata |= AR933X_UART_DATA_TX_CSR;
+	ar933x_uart_write(up, AR933X_UART_DATA_REG, rdata);
+}
+
+static unsigned int ar933x_uart_tx_empty(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+	unsigned long flags;
+	unsigned int rdata;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+	rdata = ar933x_uart_read(up, AR933X_UART_DATA_REG);
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	return (rdata & AR933X_UART_DATA_TX_CSR) ? 0 : TIOCSER_TEMT;
+}
+
+static unsigned int ar933x_uart_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CAR;
+}
+
+static void ar933x_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+}
+
+static void ar933x_uart_start_tx(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	ar933x_uart_start_tx_interrupt(up);
+}
+
+static void ar933x_uart_stop_tx(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	ar933x_uart_stop_tx_interrupt(up);
+}
+
+static void ar933x_uart_stop_rx(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	up->ier &= ~AR933X_UART_INT_RX_VALID;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+}
+
+static void ar933x_uart_break_ctl(struct uart_port *port, int break_state)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+	unsigned long flags;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+	if (break_state == -1)
+		ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
+				    AR933X_UART_CS_TX_BREAK);
+	else
+		ar933x_uart_rmw_clear(up, AR933X_UART_CS_REG,
+				      AR933X_UART_CS_TX_BREAK);
+	spin_unlock_irqrestore(&up->port.lock, flags);
+}
+
+static void ar933x_uart_enable_ms(struct uart_port *port)
+{
+}
+
+static void ar933x_uart_set_termios(struct uart_port *port,
+				    struct ktermios *new,
+				    struct ktermios *old)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+	unsigned int cs;
+	unsigned long flags;
+	unsigned int baud, scale;
+
+	/* Only CS8 is supported */
+	new->c_cflag &= ~CSIZE;
+	new->c_cflag |= CS8;
+
+	/* Only one stop bit is supported */
+	new->c_cflag &= ~CSTOPB;
+
+	cs = 0;
+	if (new->c_cflag & PARENB) {
+		if (!(new->c_cflag & PARODD))
+			cs |= AR933X_UART_CS_PARITY_EVEN;
+		else
+			cs |= AR933X_UART_CS_PARITY_ODD;
+	} else {
+		cs |= AR933X_UART_CS_PARITY_NONE;
+	}
+
+	/* Mark/space parity is not supported */
+	new->c_cflag &= ~CMSPAR;
+
+	baud = uart_get_baud_rate(port, new, old, 0, port->uartclk / 16);
+	scale = (port->uartclk / (16 * baud)) - 1;
+
+	/*
+	 * Ok, we're now changing the port state. Do it with
+	 * interrupts disabled.
+	 */
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	/* Update the per-port timeout. */
+	uart_update_timeout(port, new->c_cflag, baud);
+
+	up->port.ignore_status_mask = 0;
+
+	/* ignore all characters if CREAD is not set */
+	if ((new->c_cflag & CREAD) == 0)
+		up->port.ignore_status_mask |= AR933X_DUMMY_STATUS_RD;
+
+	ar933x_uart_write(up, AR933X_UART_CLOCK_REG,
+			  scale << AR933X_UART_CLOCK_SCALE_S | 8192);
+
+	/* setup configuration register */
+	ar933x_uart_rmw(up, AR933X_UART_CS_REG, AR933X_UART_CS_PARITY_M, cs);
+
+	/* enable host interrupt */
+	ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
+			    AR933X_UART_CS_HOST_INT_EN);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	if (tty_termios_baud_rate(new))
+		tty_termios_encode_baud_rate(new, baud, baud);
+}
+
+static void ar933x_uart_rx_chars(struct ar933x_uart_port *up)
+{
+	struct tty_struct *tty;
+	int max_count = 256;
+
+	tty = tty_port_tty_get(&up->port.state->port);
+	do {
+		unsigned int rdata;
+		unsigned char ch;
+
+		rdata = ar933x_uart_read(up, AR933X_UART_DATA_REG);
+		if ((rdata & AR933X_UART_DATA_RX_CSR) == 0)
+			break;
+
+		/* remove the character from the FIFO */
+		ar933x_uart_write(up, AR933X_UART_DATA_REG,
+				  AR933X_UART_DATA_RX_CSR);
+
+		if (!tty) {
+			/* discard the data if no tty available */
+			continue;
+		}
+
+		up->port.icount.rx++;
+		ch = rdata & AR933X_UART_DATA_TX_RX_MASK;
+
+		if (uart_handle_sysrq_char(&up->port, ch))
+			continue;
+
+		if ((up->port.ignore_status_mask & AR933X_DUMMY_STATUS_RD) == 0)
+			tty_insert_flip_char(tty, ch, TTY_NORMAL);
+	} while (max_count-- > 0);
+
+	if (tty) {
+		tty_flip_buffer_push(tty);
+		tty_kref_put(tty);
+	}
+}
+
+static void ar933x_uart_tx_chars(struct ar933x_uart_port *up)
+{
+	struct circ_buf *xmit = &up->port.state->xmit;
+	int count;
+
+	if (uart_tx_stopped(&up->port))
+		return;
+
+	count = up->port.fifosize;
+	do {
+		unsigned int rdata;
+
+		rdata = ar933x_uart_read(up, AR933X_UART_DATA_REG);
+		if ((rdata & AR933X_UART_DATA_TX_CSR) == 0)
+			break;
+
+		if (up->port.x_char) {
+			ar933x_uart_putc(up, up->port.x_char);
+			up->port.icount.tx++;
+			up->port.x_char = 0;
+			continue;
+		}
+
+		if (uart_circ_empty(xmit))
+			break;
+
+		ar933x_uart_putc(up, xmit->buf[xmit->tail]);
+
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		up->port.icount.tx++;
+	} while (--count > 0);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&up->port);
+
+	if (!uart_circ_empty(xmit))
+		ar933x_uart_start_tx_interrupt(up);
+}
+
+static irqreturn_t ar933x_uart_interrupt(int irq, void *dev_id)
+{
+	struct ar933x_uart_port *up = dev_id;
+	unsigned int status;
+
+	status = ar933x_uart_read(up, AR933X_UART_CS_REG);
+	if ((status & AR933X_UART_CS_HOST_INT) == 0)
+		return IRQ_NONE;
+
+	spin_lock(&up->port.lock);
+
+	status = ar933x_uart_read(up, AR933X_UART_INT_REG);
+	status &= ar933x_uart_read(up, AR933X_UART_INT_EN_REG);
+
+	if (status & AR933X_UART_INT_RX_VALID) {
+		ar933x_uart_write(up, AR933X_UART_INT_REG,
+				  AR933X_UART_INT_RX_VALID);
+		ar933x_uart_rx_chars(up);
+	}
+
+	if (status & AR933X_UART_INT_TX_EMPTY) {
+		ar933x_uart_write(up, AR933X_UART_INT_REG,
+				  AR933X_UART_INT_TX_EMPTY);
+		ar933x_uart_stop_tx_interrupt(up);
+		ar933x_uart_tx_chars(up);
+	}
+
+	spin_unlock(&up->port.lock);
+
+	return IRQ_HANDLED;
+}
+
+static int ar933x_uart_startup(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+	unsigned long flags;
+	int ret;
+
+	ret = request_irq(up->port.irq, ar933x_uart_interrupt,
+			  up->port.irqflags, dev_name(up->port.dev), up);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	/* Enable HOST interrupts */
+	ar933x_uart_rmw_set(up, AR933X_UART_CS_REG,
+			    AR933X_UART_CS_HOST_INT_EN);
+
+	/* Enable RX interrupts */
+	up->ier = AR933X_UART_INT_RX_VALID;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	return 0;
+}
+
+static void ar933x_uart_shutdown(struct uart_port *port)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	/* Disable all interrupts */
+	up->ier = 0;
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier);
+
+	/* Disable break condition */
+	ar933x_uart_rmw_clear(up, AR933X_UART_CS_REG,
+			      AR933X_UART_CS_TX_BREAK);
+
+	free_irq(up->port.irq, up);
+}
+
+static const char *ar933x_uart_type(struct uart_port *port)
+{
+	return (port->type == PORT_AR933X) ? "AR933X UART" : NULL;
+}
+
+static void ar933x_uart_release_port(struct uart_port *port)
+{
+	/* Nothing to release ... */
+}
+
+static int ar933x_uart_request_port(struct uart_port *port)
+{
+	/* UARTs always present */
+	return 0;
+}
+
+static void ar933x_uart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE)
+		port->type = PORT_AR933X;
+}
+
+static int ar933x_uart_verify_port(struct uart_port *port,
+				   struct serial_struct *ser)
+{
+	if (ser->type != PORT_UNKNOWN &&
+	    ser->type != PORT_AR933X)
+		return -EINVAL;
+
+	if (ser->irq < 0 || ser->irq >= NR_IRQS)
+		return -EINVAL;
+
+	if (ser->baud_base < 28800)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct uart_ops ar933x_uart_ops = {
+	.tx_empty	= ar933x_uart_tx_empty,
+	.set_mctrl	= ar933x_uart_set_mctrl,
+	.get_mctrl	= ar933x_uart_get_mctrl,
+	.stop_tx	= ar933x_uart_stop_tx,
+	.start_tx	= ar933x_uart_start_tx,
+	.stop_rx	= ar933x_uart_stop_rx,
+	.enable_ms	= ar933x_uart_enable_ms,
+	.break_ctl	= ar933x_uart_break_ctl,
+	.startup	= ar933x_uart_startup,
+	.shutdown	= ar933x_uart_shutdown,
+	.set_termios	= ar933x_uart_set_termios,
+	.type		= ar933x_uart_type,
+	.release_port	= ar933x_uart_release_port,
+	.request_port	= ar933x_uart_request_port,
+	.config_port	= ar933x_uart_config_port,
+	.verify_port	= ar933x_uart_verify_port,
+};
+
+#ifdef CONFIG_SERIAL_AR933X_CONSOLE
+
+static struct ar933x_uart_port *
+ar933x_console_ports[CONFIG_SERIAL_AR933X_NR_UARTS];
+
+static void ar933x_uart_wait_xmitr(struct ar933x_uart_port *up)
+{
+	unsigned int status;
+	unsigned int timeout = 60000;
+
+	/* Wait up to 60ms for the character(s) to be sent. */
+	do {
+		status = ar933x_uart_read(up, AR933X_UART_DATA_REG);
+		if (--timeout == 0)
+			break;
+		udelay(1);
+	} while ((status & AR933X_UART_DATA_TX_CSR) == 0);
+}
+
+static void ar933x_uart_console_putchar(struct uart_port *port, int ch)
+{
+	struct ar933x_uart_port *up = (struct ar933x_uart_port *) port;
+
+	ar933x_uart_wait_xmitr(up);
+	ar933x_uart_putc(up, ch);
+}
+
+static void ar933x_uart_console_write(struct console *co, const char *s,
+				      unsigned int count)
+{
+	struct ar933x_uart_port *up = ar933x_console_ports[co->index];
+	unsigned long flags;
+	unsigned int int_en;
+	int locked = 1;
+
+	local_irq_save(flags);
+
+	if (up->port.sysrq)
+		locked = 0;
+	else if (oops_in_progress)
+		locked = spin_trylock(&up->port.lock);
+	else
+		spin_lock(&up->port.lock);
+
+	/*
+	 * First save the IER then disable the interrupts
+	 */
+	int_en = ar933x_uart_read(up, AR933X_UART_INT_EN_REG);
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, 0);
+
+	uart_console_write(&up->port, s, count, ar933x_uart_console_putchar);
+
+	/*
+	 * Finally, wait for transmitter to become empty
+	 * and restore the IER
+	 */
+	ar933x_uart_wait_xmitr(up);
+	ar933x_uart_write(up, AR933X_UART_INT_EN_REG, int_en);
+
+	ar933x_uart_write(up, AR933X_UART_INT_REG, AR933X_UART_INT_ALLINTS);
+
+	if (locked)
+		spin_unlock(&up->port.lock);
+
+	local_irq_restore(flags);
+}
+
+static int ar933x_uart_console_setup(struct console *co, char *options)
+{
+	struct ar933x_uart_port *up;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index < 0 || co->index >= CONFIG_SERIAL_AR933X_NR_UARTS)
+		return -EINVAL;
+
+	up = ar933x_console_ports[co->index];
+	if (!up)
+		return -ENODEV;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(&up->port, co, baud, parity, bits, flow);
+}
+
+static struct console ar933x_uart_console = {
+	.name		= "ttyATH",
+	.write		= ar933x_uart_console_write,
+	.device		= uart_console_device,
+	.setup		= ar933x_uart_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &ar933x_uart_driver,
+};
+
+static void ar933x_uart_add_console_port(struct ar933x_uart_port *up)
+{
+	ar933x_console_ports[up->port.line] = up;
+}
+
+#define AR933X_SERIAL_CONSOLE	(&ar933x_uart_console)
+
+#else
+
+static inline void ar933x_uart_add_console_port(struct ar933x_uart_port *up) {}
+
+#define AR933X_SERIAL_CONSOLE	NULL
+
+#endif /* CONFIG_SERIAL_AR933X_CONSOLE */
+
+static struct uart_driver ar933x_uart_driver = {
+	.owner		= THIS_MODULE,
+	.driver_name	= DRIVER_NAME,
+	.dev_name	= "ttyATH",
+	.nr		= CONFIG_SERIAL_AR933X_NR_UARTS,
+	.cons		= AR933X_SERIAL_CONSOLE,
+};
+
+static int __devinit ar933x_uart_probe(struct platform_device *pdev)
+{
+	struct ar933x_uart_platform_data *pdata;
+	struct ar933x_uart_port *up;
+	struct uart_port *port;
+	struct resource *mem_res;
+	struct resource *irq_res;
+	int id;
+	int ret;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata)
+		return -EINVAL;
+
+	id = pdev->id;
+	if (id == -1)
+		id = 0;
+
+	if (id > CONFIG_SERIAL_AR933X_NR_UARTS)
+		return -EINVAL;
+
+	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem_res) {
+		dev_err(&pdev->dev, "no MEM resource\n");
+		return -EINVAL;
+	}
+
+	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq_res) {
+		dev_err(&pdev->dev, "no IRQ resource\n");
+		return -EINVAL;
+	}
+
+	up = kzalloc(sizeof(struct ar933x_uart_port), GFP_KERNEL);
+	if (!up)
+		return -ENOMEM;
+
+	port = &up->port;
+	port->mapbase = mem_res->start;
+
+	port->membase = ioremap(mem_res->start, AR933X_UART_REGS_SIZE);
+	if (!port->membase) {
+		ret = -ENOMEM;
+		goto err_free_up;
+	}
+
+	port->line = id;
+	port->irq = irq_res->start;
+	port->dev = &pdev->dev;
+	port->type = PORT_AR933X;
+	port->iotype = UPIO_MEM32;
+	port->uartclk = pdata->uartclk;
+
+	port->regshift = 2;
+	port->fifosize = AR933X_UART_FIFO_SIZE;
+	port->ops = &ar933x_uart_ops;
+
+	ar933x_uart_add_console_port(up);
+
+	ret = uart_add_one_port(&ar933x_uart_driver, &up->port);
+	if (ret)
+		goto err_unmap;
+
+	platform_set_drvdata(pdev, up);
+	return 0;
+
+err_unmap:
+	iounmap(up->port.membase);
+err_free_up:
+	kfree(up);
+	return ret;
+}
+
+static int __devexit ar933x_uart_remove(struct platform_device *pdev)
+{
+	struct ar933x_uart_port *up;
+
+	up = platform_get_drvdata(pdev);
+	platform_set_drvdata(pdev, NULL);
+
+	if (up) {
+		uart_remove_one_port(&ar933x_uart_driver, &up->port);
+		iounmap(up->port.membase);
+		kfree(up);
+	}
+
+	return 0;
+}
+
+static struct platform_driver ar933x_uart_platform_driver = {
+	.probe		= ar933x_uart_probe,
+	.remove		= __devexit_p(ar933x_uart_remove),
+	.driver		= {
+		.name		= DRIVER_NAME,
+		.owner		= THIS_MODULE,
+	},
+};
+
+static int __init ar933x_uart_init(void)
+{
+	int ret;
+
+	ar933x_uart_driver.nr = CONFIG_SERIAL_AR933X_NR_UARTS;
+	ret = uart_register_driver(&ar933x_uart_driver);
+	if (ret)
+		goto err_out;
+
+	ret = platform_driver_register(&ar933x_uart_platform_driver);
+	if (ret)
+		goto err_unregister_uart_driver;
+
+	return 0;
+
+err_unregister_uart_driver:
+	uart_unregister_driver(&ar933x_uart_driver);
+err_out:
+	return ret;
+}
+
+static void __exit ar933x_uart_exit(void)
+{
+	platform_driver_unregister(&ar933x_uart_platform_driver);
+	uart_unregister_driver(&ar933x_uart_driver);
+}
+
+module_init(ar933x_uart_init);
+module_exit(ar933x_uart_exit);
+
+MODULE_DESCRIPTION("Atheros AR933X UART driver");
+MODULE_AUTHOR("Gabor Juhos <juhosg@openwrt.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index eadf33d0abba..3c35fb2f688f 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -207,6 +207,10 @@
 /* Xilinx PSS UART */
 #define PORT_XUARTPS	98
 
+/* Atheros AR933X SoC */
+#define PORT_AR933X	99
+
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From eda2030a5b60bb818f062adacbcfb6fd2d366fb9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Thu, 8 Dec 2011 22:58:54 +0900
Subject: sh: extend clock struct with mapped_reg member

Add a "mapped_reg" member to struct clk and use that
to keep the ioremapped register based on enable_reg.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/clk/core.c  | 9 +++++++--
 include/linux/sh_clk.h | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c
index db257a35e71a..7715de2629c1 100644
--- a/drivers/sh/clk/core.c
+++ b/drivers/sh/clk/core.c
@@ -355,7 +355,7 @@ static int clk_establish_mapping(struct clk *clk)
 		 */
 		if (!clk->parent) {
 			clk->mapping = &dummy_mapping;
-			return 0;
+			goto out;
 		}
 
 		/*
@@ -384,6 +384,9 @@ static int clk_establish_mapping(struct clk *clk)
 	}
 
 	clk->mapping = mapping;
+out:
+	clk->mapped_reg = clk->mapping->base;
+	clk->mapped_reg += (phys_addr_t)clk->enable_reg - clk->mapping->phys;
 	return 0;
 }
 
@@ -402,10 +405,12 @@ static void clk_teardown_mapping(struct clk *clk)
 
 	/* Nothing to do */
 	if (mapping == &dummy_mapping)
-		return;
+		goto out;
 
 	kref_put(&mapping->ref, clk_destroy_mapping);
 	clk->mapping = NULL;
+out:
+	clk->mapped_reg = NULL;
 }
 
 int clk_register(struct clk *clk)
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index e834304c0b6a..54341d811685 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -49,6 +49,7 @@ struct clk {
 
 	void __iomem		*enable_reg;
 	unsigned int		enable_bit;
+	void __iomem		*mapped_reg;
 
 	unsigned long		arch_flags;
 	void			*priv;
-- 
cgit v1.2.3


From b0e10211cba1629e2e534ca9cb3d87cfc7e389ea Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 9 Dec 2011 12:14:27 +0900
Subject: sh: pfc: ioremap() support

Add support for non-entity mapped PFC registers through
the use of struct resource and ioremap()/iounmap().

The PFC main data structure gets updated with a pointer
to a struct resources array that point out all register
windows used by the PFC instance. The register definitions
are kept as physical addresses but the PFC code will do
transparent conversion into virtual addresses whenever
register windows are specified using with struct resource.

To introduce as little performance penalty as possible the
virtual address of each data register is cached in memory.
The virtual address of each configuration register is however
calculated during run time. This because the configuration
is considered slow path so focus is instead put on keeping
memory foot print as small as possible.

The PFC register access  code is in this patch updated from
__raw_readN() / __raw_writeN() into ioreadN() / iowriteN().

This patch is needed to support the PFC block in r8a7779.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/pfc.c       | 137 ++++++++++++++++++++++++++++++++++++++++---------
 include/linux/sh_pfc.h |  11 ++++
 2 files changed, 124 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/pfc.c b/drivers/sh/pfc.c
index e67fe170d8d5..e7d127a9c1c5 100644
--- a/drivers/sh/pfc.c
+++ b/drivers/sh/pfc.c
@@ -19,6 +19,75 @@
 #include <linux/irq.h>
 #include <linux/bitops.h>
 #include <linux/gpio.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+
+static void pfc_iounmap(struct pinmux_info *pip)
+{
+	int k;
+
+	for (k = 0; k < pip->num_resources; k++)
+		if (pip->window[k].virt)
+			iounmap(pip->window[k].virt);
+
+	kfree(pip->window);
+	pip->window = NULL;
+}
+
+static int pfc_ioremap(struct pinmux_info *pip)
+{
+	struct resource *res;
+	int k;
+
+	if (!pip->num_resources)
+		return 0;
+
+	pip->window = kzalloc(pip->num_resources * sizeof(*pip->window),
+			      GFP_NOWAIT);
+	if (!pip->window)
+		goto err1;
+
+	for (k = 0; k < pip->num_resources; k++) {
+		res = pip->resource + k;
+		WARN_ON(resource_type(res) != IORESOURCE_MEM);
+		pip->window[k].phys = res->start;
+		pip->window[k].size = resource_size(res);
+		pip->window[k].virt = ioremap_nocache(res->start,
+							 resource_size(res));
+		if (!pip->window[k].virt)
+			goto err2;
+	}
+
+	return 0;
+
+err2:
+	pfc_iounmap(pip);
+err1:
+	return -1;
+}
+
+static void __iomem *pfc_phys_to_virt(struct pinmux_info *pip,
+				      unsigned long address)
+{
+	struct pfc_window *window;
+	int k;
+
+	/* scan through physical windows and convert address */
+	for (k = 0; k < pip->num_resources; k++) {
+		window = pip->window + k;
+
+		if (address < window->phys)
+			continue;
+
+		if (address >= (window->phys + window->size))
+			continue;
+
+		return window->virt + (address - window->phys);
+	}
+
+	/* no windows defined, register must be 1:1 mapped virt:phys */
+	return (void __iomem *)address;
+}
 
 static int enum_in_range(pinmux_enum_t enum_id, struct pinmux_range *r)
 {
@@ -31,35 +100,35 @@ static int enum_in_range(pinmux_enum_t enum_id, struct pinmux_range *r)
 	return 1;
 }
 
-static unsigned long gpio_read_raw_reg(unsigned long reg,
+static unsigned long gpio_read_raw_reg(void __iomem *mapped_reg,
 				       unsigned long reg_width)
 {
 	switch (reg_width) {
 	case 8:
-		return __raw_readb(reg);
+		return ioread8(mapped_reg);
 	case 16:
-		return __raw_readw(reg);
+		return ioread16(mapped_reg);
 	case 32:
-		return __raw_readl(reg);
+		return ioread32(mapped_reg);
 	}
 
 	BUG();
 	return 0;
 }
 
-static void gpio_write_raw_reg(unsigned long reg,
+static void gpio_write_raw_reg(void __iomem *mapped_reg,
 			       unsigned long reg_width,
 			       unsigned long data)
 {
 	switch (reg_width) {
 	case 8:
-		__raw_writeb(data, reg);
+		iowrite8(data, mapped_reg);
 		return;
 	case 16:
-		__raw_writew(data, reg);
+		iowrite16(data, mapped_reg);
 		return;
 	case 32:
-		__raw_writel(data, reg);
+		iowrite32(data, mapped_reg);
 		return;
 	}
 
@@ -82,11 +151,12 @@ static void gpio_write_bit(struct pinmux_data_reg *dr,
 	else
 		clear_bit(pos, &dr->reg_shadow);
 
-	gpio_write_raw_reg(dr->reg, dr->reg_width, dr->reg_shadow);
+	gpio_write_raw_reg(dr->mapped_reg, dr->reg_width, dr->reg_shadow);
 }
 
-static int gpio_read_reg(unsigned long reg, unsigned long reg_width,
-			 unsigned long field_width, unsigned long in_pos)
+static int gpio_read_reg(void __iomem *mapped_reg, unsigned long reg_width,
+			 unsigned long field_width, unsigned long in_pos,
+			 unsigned long reg)
 {
 	unsigned long data, mask, pos;
 
@@ -98,13 +168,13 @@ static int gpio_read_reg(unsigned long reg, unsigned long reg_width,
 		 "r_width = %ld, f_width = %ld\n",
 		 reg, pos, reg_width, field_width);
 
-	data = gpio_read_raw_reg(reg, reg_width);
+	data = gpio_read_raw_reg(mapped_reg, reg_width);
 	return (data >> pos) & mask;
 }
 
-static void gpio_write_reg(unsigned long reg, unsigned long reg_width,
+static void gpio_write_reg(void __iomem *mapped_reg, unsigned long reg_width,
 			   unsigned long field_width, unsigned long in_pos,
-			   unsigned long value)
+			   unsigned long value, unsigned long reg)
 {
 	unsigned long mask, pos;
 
@@ -120,13 +190,13 @@ static void gpio_write_reg(unsigned long reg, unsigned long reg_width,
 
 	switch (reg_width) {
 	case 8:
-		__raw_writeb((__raw_readb(reg) & mask) | value, reg);
+		iowrite8((ioread8(mapped_reg) & mask) | value, mapped_reg);
 		break;
 	case 16:
-		__raw_writew((__raw_readw(reg) & mask) | value, reg);
+		iowrite16((ioread16(mapped_reg) & mask) | value, mapped_reg);
 		break;
 	case 32:
-		__raw_writel((__raw_readl(reg) & mask) | value, reg);
+		iowrite32((ioread32(mapped_reg) & mask) | value, mapped_reg);
 		break;
 	}
 }
@@ -147,6 +217,8 @@ static int setup_data_reg(struct pinmux_info *gpioc, unsigned gpio)
 		if (!data_reg->reg_width)
 			break;
 
+		data_reg->mapped_reg = pfc_phys_to_virt(gpioc, data_reg->reg);
+
 		for (n = 0; n < data_reg->reg_width; n++) {
 			if (data_reg->enum_ids[n] == gpiop->enum_id) {
 				gpiop->flags &= ~PINMUX_FLAG_DREG;
@@ -179,7 +251,8 @@ static void setup_data_regs(struct pinmux_info *gpioc)
 		if (!drp->reg_width)
 			break;
 
-		drp->reg_shadow = gpio_read_raw_reg(drp->reg, drp->reg_width);
+		drp->reg_shadow = gpio_read_raw_reg(drp->mapped_reg,
+						    drp->reg_width);
 		k++;
 	}
 }
@@ -266,12 +339,16 @@ static void write_config_reg(struct pinmux_info *gpioc,
 			     int index)
 {
 	unsigned long ncomb, pos, value;
+	void __iomem *mapped_reg;
 
 	ncomb = 1 << crp->field_width;
 	pos = index / ncomb;
 	value = index % ncomb;
 
-	gpio_write_reg(crp->reg, crp->reg_width, crp->field_width, pos, value);
+	mapped_reg = pfc_phys_to_virt(gpioc, crp->reg);
+
+	gpio_write_reg(mapped_reg, crp->reg_width, crp->field_width,
+		       pos, value, crp->reg);
 }
 
 static int check_config_reg(struct pinmux_info *gpioc,
@@ -279,13 +356,16 @@ static int check_config_reg(struct pinmux_info *gpioc,
 			    int index)
 {
 	unsigned long ncomb, pos, value;
+	void __iomem *mapped_reg;
 
 	ncomb = 1 << crp->field_width;
 	pos = index / ncomb;
 	value = index % ncomb;
 
-	if (gpio_read_reg(crp->reg, crp->reg_width,
-			  crp->field_width, pos) == value)
+	mapped_reg = pfc_phys_to_virt(gpioc, crp->reg);
+
+	if (gpio_read_reg(mapped_reg, crp->reg_width,
+			  crp->field_width, pos, crp->reg) == value)
 		return 0;
 
 	return -1;
@@ -564,7 +644,7 @@ static int sh_gpio_get_value(struct pinmux_info *gpioc, unsigned gpio)
 	if (!gpioc || get_data_reg(gpioc, gpio, &dr, &bit) != 0)
 		return -EINVAL;
 
-	return gpio_read_reg(dr->reg, dr->reg_width, 1, bit);
+	return gpio_read_reg(dr->mapped_reg, dr->reg_width, 1, bit, dr->reg);
 }
 
 static int sh_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -606,10 +686,15 @@ static int sh_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
 int register_pinmux(struct pinmux_info *pip)
 {
 	struct gpio_chip *chip = &pip->chip;
+	int ret;
 
 	pr_info("%s handling gpio %d -> %d\n",
 		pip->name, pip->first_gpio, pip->last_gpio);
 
+	ret = pfc_ioremap(pip);
+	if (ret < 0)
+		return ret;
+
 	setup_data_regs(pip);
 
 	chip->request = sh_gpio_request;
@@ -627,12 +712,16 @@ int register_pinmux(struct pinmux_info *pip)
 	chip->base = pip->first_gpio;
 	chip->ngpio = (pip->last_gpio - pip->first_gpio) + 1;
 
-	return gpiochip_add(chip);
+	ret = gpiochip_add(chip);
+	if (ret < 0)
+		pfc_iounmap(pip);
+
+	return ret;
 }
 
 int unregister_pinmux(struct pinmux_info *pip)
 {
 	pr_info("%s deregistering\n", pip->name);
-
+	pfc_iounmap(pip);
 	return gpiochip_remove(&pip->chip);
 }
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 8446789216e5..91666a58529d 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -55,6 +55,7 @@ struct pinmux_cfg_reg {
 struct pinmux_data_reg {
 	unsigned long reg, reg_width, reg_shadow;
 	pinmux_enum_t *enum_ids;
+	void __iomem *mapped_reg;
 };
 
 #define PINMUX_DATA_REG(name, r, r_width) \
@@ -75,6 +76,12 @@ struct pinmux_range {
 	pinmux_enum_t force;
 };
 
+struct pfc_window {
+	phys_addr_t phys;
+	void __iomem *virt;
+	unsigned long size;
+};
+
 struct pinmux_info {
 	char *name;
 	pinmux_enum_t reserved_id;
@@ -98,6 +105,10 @@ struct pinmux_info {
 	struct pinmux_irq *gpio_irq;
 	unsigned int gpio_irq_size;
 
+	struct resource *resource;
+	unsigned int num_resources;
+	struct pfc_window *window;
+
 	struct gpio_chip chip;
 };
 
-- 
cgit v1.2.3


From 25a0bc2dfc2ea732f40af2dae52426ead66ae76e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 7 Dec 2011 11:24:20 -0800
Subject: power_supply: add SCOPE attribute to power supplies

This adds a "scope" attribute to a power_supply, which indicates how
much of the system it powers.  It appears in sysfs as "scope" or in
the uevent file as POWER_SUPPLY_SCOPE=.  There are presently three
possible values:
	Unknown - unknown power topology
	System - the power supply powers the whole system
	Device - it powers a specific device, or tree of devices

A power supply which doesn't have a "scope" attribute should be assumed to
have "System" scope.

In general, usermode should assume that loss of all System-scoped power
supplies will power off the whole system, but any single one is sufficient
to power the system.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Richard Hughes <richard@hughsie.com>
---
 drivers/power/power_supply_sysfs.c | 6 ++++++
 include/linux/power_supply.h       | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index e15d4c9d3988..21178ebfe51a 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -63,6 +63,9 @@ static ssize_t power_supply_show_property(struct device *dev,
 	static char *capacity_level_text[] = {
 		"Unknown", "Critical", "Low", "Normal", "High", "Full"
 	};
+	static char *scope_text[] = {
+		"Unknown", "System", "Device"
+	};
 	ssize_t ret = 0;
 	struct power_supply *psy = dev_get_drvdata(dev);
 	const ptrdiff_t off = attr - power_supply_attrs;
@@ -95,6 +98,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 		return sprintf(buf, "%s\n", capacity_level_text[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_TYPE)
 		return sprintf(buf, "%s\n", type_text[value.intval]);
+	else if (off == POWER_SUPPLY_PROP_SCOPE)
+		return sprintf(buf, "%s\n", scope_text[value.intval]);
 	else if (off >= POWER_SUPPLY_PROP_MODEL_NAME)
 		return sprintf(buf, "%s\n", value.strval);
 
@@ -167,6 +172,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(time_to_full_now),
 	POWER_SUPPLY_ATTR(time_to_full_avg),
 	POWER_SUPPLY_ATTR(type),
+	POWER_SUPPLY_ATTR(scope),
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_ATTR(model_name),
 	POWER_SUPPLY_ATTR(manufacturer),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 204c18dfdc9e..040a7b08e7c7 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -74,6 +74,12 @@ enum {
 	POWER_SUPPLY_CAPACITY_LEVEL_FULL,
 };
 
+enum {
+	POWER_SUPPLY_SCOPE_UNKNOWN = 0,
+	POWER_SUPPLY_SCOPE_SYSTEM,
+	POWER_SUPPLY_SCOPE_DEVICE,
+};
+
 enum power_supply_property {
 	/* Properties of type `int' */
 	POWER_SUPPLY_PROP_STATUS = 0,
@@ -116,6 +122,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
 	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
 	POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */
+	POWER_SUPPLY_PROP_SCOPE,
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
-- 
cgit v1.2.3


From 8351665195cec6d2b73cce8b66f02d6dde246a8e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 7 Dec 2011 09:15:45 -0800
Subject: power_supply: allow a power supply to explicitly point to powered
 device

If a power supply has a scope of "Device", then allow the power supply
to indicate what device it actually powers. This is represented in the
power supply's sysfs directory as a symlink named "powers", which points to
the sysfs directory of the powered device.

If the device has children, then the sub-devices are also powered by
the same power supply.

Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Richard Hughes <richard@hughsie.com>
---
 drivers/power/power_supply_core.c | 7 +++++++
 include/linux/power_supply.h      | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 329b46b2327d..b10c121244e5 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -147,6 +147,12 @@ struct power_supply *power_supply_get_by_name(char *name)
 }
 EXPORT_SYMBOL_GPL(power_supply_get_by_name);
 
+int power_supply_powers(struct power_supply *psy, struct device *dev)
+{
+	return sysfs_create_link_nowarn(&psy->dev->kobj, &dev->kobj, "powers");
+}
+EXPORT_SYMBOL_GPL(power_supply_powers);
+
 static void power_supply_dev_release(struct device *dev)
 {
 	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
@@ -202,6 +208,7 @@ EXPORT_SYMBOL_GPL(power_supply_register);
 void power_supply_unregister(struct power_supply *psy)
 {
 	cancel_work_sync(&psy->changed_work);
+	sysfs_remove_link(&psy->dev->kobj, "powers");
 	power_supply_remove_triggers(psy);
 	device_unregister(psy->dev);
 }
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 040a7b08e7c7..2e3c8279b3b0 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -218,6 +218,7 @@ static inline int power_supply_is_system_supplied(void) { return -ENOSYS; }
 extern int power_supply_register(struct device *parent,
 				 struct power_supply *psy);
 extern void power_supply_unregister(struct power_supply *psy);
+extern int power_supply_powers(struct power_supply *psy, struct device *dev);
 
 /* For APM emulation, think legacy userspace. */
 extern struct class *power_supply_class;
-- 
cgit v1.2.3


From f30ca6ba0bb2b7d050f24682bb8639c939c79859 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:56:32 +0000
Subject: efi.h: Add struct definition for boot time services

With the forthcoming efi stub code we're gonna need to access boot
time services so let's define a struct so we can access the functions.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 2362a0bc7f0d..9547597ad6be 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -138,6 +138,57 @@ typedef struct {
 	u8 sets_to_zero;
 } efi_time_cap_t;
 
+/*
+ * EFI Boot Services table
+ */
+typedef struct {
+	efi_table_hdr_t hdr;
+	void *raise_tpl;
+	void *restore_tpl;
+	void *allocate_pages;
+	void *free_pages;
+	void *get_memory_map;
+	void *allocate_pool;
+	void *free_pool;
+	void *create_event;
+	void *set_timer;
+	void *wait_for_event;
+	void *signal_event;
+	void *close_event;
+	void *check_event;
+	void *install_protocol_interface;
+	void *reinstall_protocol_interface;
+	void *uninstall_protocol_interface;
+	void *handle_protocol;
+	void *__reserved;
+	void *register_protocol_notify;
+	void *locate_handle;
+	void *locate_device_path;
+	void *install_configuration_table;
+	void *load_image;
+	void *start_image;
+	void *exit;
+	void *unload_image;
+	void *exit_boot_services;
+	void *get_next_monotonic_count;
+	void *stall;
+	void *set_watchdog_timer;
+	void *connect_controller;
+	void *disconnect_controller;
+	void *open_protocol;
+	void *close_protocol;
+	void *open_protocol_information;
+	void *protocols_per_handle;
+	void *locate_handle_buffer;
+	void *locate_protocol;
+	void *install_multiple_protocol_interfaces;
+	void *uninstall_multiple_protocol_interfaces;
+	void *calculate_crc32;
+	void *copy_mem;
+	void *set_mem;
+	void *create_event_ex;
+} efi_boot_services_t;
+
 /*
  * Types and defines for EFI ResetSystem
  */
@@ -261,7 +312,7 @@ typedef struct {
 	unsigned long stderr_handle;
 	unsigned long stderr;
 	efi_runtime_services_t *runtime;
-	unsigned long boottime;
+	efi_boot_services_t *boottime;
 	unsigned long nr_tables;
 	unsigned long tables;
 } efi_system_table_t;
-- 
cgit v1.2.3


From 8e84f345e2f2189a37492c77c566c7494b7b6b23 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:56:50 +0000
Subject: efi.h: Add efi_image_loaded_t

Add the EFI loaded image structure and protocol guid which are
required by the x86 EFI boot stub. The EFI boot stub uses the
structure to figure out where it was loaded in memory and to pass
command line arguments to the kernel.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 9547597ad6be..e35005f451db 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -287,6 +287,9 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
 #define LINUX_EFI_CRASH_GUID \
     EFI_GUID(  0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 )
 
+#define LOADED_IMAGE_PROTOCOL_GUID \
+    EFI_GUID(  0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
+
 typedef struct {
 	efi_guid_t guid;
 	unsigned long table;
@@ -326,6 +329,22 @@ struct efi_memory_map {
 	unsigned long desc_size;
 };
 
+typedef struct {
+	u32 revision;
+	void *parent_handle;
+	efi_system_table_t *system_table;
+	void *device_handle;
+	void *file_path;
+	void *reserved;
+	u32 load_options_size;
+	void *load_options;
+	void *image_base;
+	__aligned_u64 image_size;
+	unsigned int image_code_type;
+	unsigned int image_data_type;
+	unsigned long unload;
+} efi_loaded_image_t;
+
 #define EFI_INVALID_TABLE_ADDR		(~0UL)
 
 /*
-- 
cgit v1.2.3


From bb05e4ba452ada7966fbced4e829aa029f546445 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:57:03 +0000
Subject: efi.h: Add allocation types for boottime->allocate_pages()

Add the allocation types detailed in section 6.2 - "AllocatePages()"
of the UEFI 2.3 specification. These definitions will be used by the
x86 EFI boot stub which needs to allocate memory during boot.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index e35005f451db..378f2cd1f7c3 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -109,6 +109,14 @@ typedef struct {
 	u32 imagesize;
 } efi_capsule_header_t;
 
+/*
+ * Allocation types for calls to boottime->allocate_pages.
+ */
+#define EFI_ALLOCATE_ANY_PAGES		0
+#define EFI_ALLOCATE_MAX_ADDRESS	1
+#define EFI_ALLOCATE_ADDRESS		2
+#define EFI_MAX_ALLOCATE_TYPE		3
+
 typedef int (*efi_freemem_callback_t) (u64 start, u64 end, void *arg);
 
 /*
-- 
cgit v1.2.3


From 0f7c5d477f2ce552997831d80e2c872cca1b9054 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:57:16 +0000
Subject: efi.h: Add graphics protocol guids

The x86 EFI boot stub uses the Graphics Output Protocol and Universal
Graphics Adapter (UGA) protocol guids when initialising graphics
during boot.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 378f2cd1f7c3..e46d771f87e5 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -298,6 +298,15 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
 #define LOADED_IMAGE_PROTOCOL_GUID \
     EFI_GUID(  0x5b1b31a1, 0x9562, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
 
+#define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID \
+    EFI_GUID(  0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a )
+
+#define EFI_UGA_PROTOCOL_GUID \
+    EFI_GUID(  0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39 )
+
+#define EFI_PCI_IO_PROTOCOL_GUID \
+    EFI_GUID(  0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x2, 0x9a )
+
 typedef struct {
 	efi_guid_t guid;
 	unsigned long table;
-- 
cgit v1.2.3


From e2527a7cbec073b69a251193f200a88efbced7ad Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 15 Nov 2011 12:57:26 +0000
Subject: efi.h: Add boottime->locate_handle search types

The x86 EFI boot stub needs to locate handles for various protocols.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index e46d771f87e5..d407c88f955f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -471,6 +471,13 @@ extern int __init efi_setup_pcdp_console(char *);
 #define EFI_VARIABLE_BOOTSERVICE_ACCESS 0x0000000000000002
 #define EFI_VARIABLE_RUNTIME_ACCESS     0x0000000000000004
 
+/*
+ * The type of search to perform when calling boottime->locate_handle
+ */
+#define EFI_LOCATE_ALL_HANDLES			0
+#define EFI_LOCATE_BY_REGISTER_NOTIFY		1
+#define EFI_LOCATE_BY_PROTOCOL			2
+
 /*
  * EFI Device Path information
  */
-- 
cgit v1.2.3


From 55839d515495e766605d7aaabd9c2758370a8d27 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Thu, 11 Aug 2011 10:28:06 +0100
Subject: efi: Add EFI file I/O data types

The x86 EFI stub needs to access files, for example when loading
initrd's. Add the required data types.

Cc: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1318848017-12301-1-git-send-email-matt@console-pimps.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/efi.h | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index d407c88f955f..37c300712e02 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -307,6 +307,12 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
 #define EFI_PCI_IO_PROTOCOL_GUID \
     EFI_GUID(  0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x2, 0x9a )
 
+#define EFI_FILE_INFO_ID \
+    EFI_GUID(  0x9576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
+
+#define EFI_FILE_SYSTEM_GUID \
+    EFI_GUID(  0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
+
 typedef struct {
 	efi_guid_t guid;
 	unsigned long table;
@@ -362,6 +368,40 @@ typedef struct {
 	unsigned long unload;
 } efi_loaded_image_t;
 
+typedef struct {
+	u64 revision;
+	void *open_volume;
+} efi_file_io_interface_t;
+
+typedef struct {
+	u64 size;
+	u64 file_size;
+	u64 phys_size;
+	efi_time_t create_time;
+	efi_time_t last_access_time;
+	efi_time_t modification_time;
+	__aligned_u64 attribute;
+	efi_char16_t filename[1];
+} efi_file_info_t;
+
+typedef struct {
+	u64 revision;
+	void *open;
+	void *close;
+	void *delete;
+	void *read;
+	void *write;
+	void *get_position;
+	void *set_position;
+	void *get_info;
+	void *set_info;
+	void *flush;
+} efi_file_handle_t;
+
+#define EFI_FILE_MODE_READ	0x0000000000000001
+#define EFI_FILE_MODE_WRITE	0x0000000000000002
+#define EFI_FILE_MODE_CREATE	0x8000000000000000
+
 #define EFI_INVALID_TABLE_ADDR		(~0UL)
 
 /*
-- 
cgit v1.2.3


From ff803ed4ddbbf9f4bbd439b5e23dc25a4e0cce7a Mon Sep 17 00:00:00 2001
From: Courtney Cavin <courtney.cavin@sonyericsson.com>
Date: Sun, 11 Dec 2011 23:38:27 -0800
Subject: Input: add driver for Sharp gp2ap002a00f proximity sensor

This driver adds support for Sharp's GP2AP002A00F proximity sensor. The
proximity is measured as a binary switch, i.e. an object is either
detected or not detected. Hence, this driver is implemented as a switch
that reports SW_FRONT_PROXIMITY.

Reviewed-by: Datta Shubhrajyoti <shubhrajyoti@ti.com>
Signed-off-by: Courtney Cavin <courtney.cavin@sonyericsson.com>
Signed-off-by: Oskar Andero <oskar.andero@sonyericsson.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/Kconfig         |  11 ++
 drivers/input/misc/Makefile        |   3 +-
 drivers/input/misc/gp2ap002a00f.c  | 299 +++++++++++++++++++++++++++++++++++++
 include/linux/input/gp2ap002a00f.h |  22 +++
 4 files changed, 334 insertions(+), 1 deletion(-)
 create mode 100644 drivers/input/misc/gp2ap002a00f.c
 create mode 100644 include/linux/input/gp2ap002a00f.h

(limited to 'include/linux')

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index e53b443d1e33..7b46781c30c9 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -179,6 +179,17 @@ config INPUT_APANEL
 	 To compile this driver as a module, choose M here: the module will
 	 be called apanel.
 
+config INPUT_GP2A
+	tristate "Sharp GP2AP002A00F I2C Proximity/Opto sensor driver"
+	depends on I2C
+	depends on GENERIC_GPIO
+	help
+	  Say Y here if you have a Sharp GP2AP002A00F proximity/als combo-chip
+	  hooked to an I2C bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gp2ap002a00f.
+
 config INPUT_GPIO_TILT_POLLED
 	tristate "Polled GPIO tilt switch"
 	depends on GENERIC_GPIO
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 90070c1a4ad3..46671a875b91 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -22,8 +22,9 @@ obj-$(CONFIG_INPUT_CMA3000)		+= cma3000_d0x.o
 obj-$(CONFIG_INPUT_CMA3000_I2C)		+= cma3000_d0x_i2c.o
 obj-$(CONFIG_INPUT_COBALT_BTNS)		+= cobalt_btns.o
 obj-$(CONFIG_INPUT_DM355EVM)		+= dm355evm_keys.o
-obj-$(CONFIG_HP_SDC_RTC)		+= hp_sdc_rtc.o
+obj-$(CONFIG_INPUT_GP2A)		+= gp2ap002a00f.o
 obj-$(CONFIG_INPUT_GPIO_TILT_POLLED)	+= gpio_tilt_polled.o
+obj-$(CONFIG_HP_SDC_RTC)		+= hp_sdc_rtc.o
 obj-$(CONFIG_INPUT_IXP4XX_BEEPER)	+= ixp4xx-beeper.o
 obj-$(CONFIG_INPUT_KEYSPAN_REMOTE)	+= keyspan_remote.o
 obj-$(CONFIG_INPUT_KXTJ9)		+= kxtj9.o
diff --git a/drivers/input/misc/gp2ap002a00f.c b/drivers/input/misc/gp2ap002a00f.c
new file mode 100644
index 000000000000..71fba8c2fc66
--- /dev/null
+++ b/drivers/input/misc/gp2ap002a00f.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2011 Sony Ericsson Mobile Communications Inc.
+ *
+ * Author: Courtney Cavin <courtney.cavin@sonyericsson.com>
+ * Prepared for up-stream by: Oskar Andero <oskar.andero@sonyericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/input/gp2ap002a00f.h>
+
+struct gp2a_data {
+	struct input_dev *input;
+	const struct gp2a_platform_data *pdata;
+	struct i2c_client *i2c_client;
+};
+
+enum gp2a_addr {
+	GP2A_ADDR_PROX	= 0x0,
+	GP2A_ADDR_GAIN	= 0x1,
+	GP2A_ADDR_HYS	= 0x2,
+	GP2A_ADDR_CYCLE	= 0x3,
+	GP2A_ADDR_OPMOD	= 0x4,
+	GP2A_ADDR_CON	= 0x6
+};
+
+enum gp2a_controls {
+	/* Software Shutdown control: 0 = shutdown, 1 = normal operation */
+	GP2A_CTRL_SSD	= 0x01
+};
+
+static int gp2a_report(struct gp2a_data *dt)
+{
+	int vo = gpio_get_value(dt->pdata->vout_gpio);
+
+	input_report_switch(dt->input, SW_FRONT_PROXIMITY, !vo);
+	input_sync(dt->input);
+
+	return 0;
+}
+
+static irqreturn_t gp2a_irq(int irq, void *handle)
+{
+	struct gp2a_data *dt = handle;
+
+	gp2a_report(dt);
+
+	return IRQ_HANDLED;
+}
+
+static int gp2a_enable(struct gp2a_data *dt)
+{
+	return i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_OPMOD,
+					 GP2A_CTRL_SSD);
+}
+
+static int gp2a_disable(struct gp2a_data *dt)
+{
+	return i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_OPMOD,
+					 0x00);
+}
+
+static int gp2a_device_open(struct input_dev *dev)
+{
+	struct gp2a_data *dt = input_get_drvdata(dev);
+	int error;
+
+	error = gp2a_enable(dt);
+	if (error < 0) {
+		dev_err(&dt->i2c_client->dev,
+			"unable to activate, err %d\n", error);
+		return error;
+	}
+
+	gp2a_report(dt);
+
+	return 0;
+}
+
+static void gp2a_device_close(struct input_dev *dev)
+{
+	struct gp2a_data *dt = input_get_drvdata(dev);
+	int error;
+
+	error = gp2a_disable(dt);
+	if (error < 0)
+		dev_err(&dt->i2c_client->dev,
+			"unable to deactivate, err %d\n", error);
+}
+
+static int __devinit gp2a_initialize(struct gp2a_data *dt)
+{
+	int error;
+
+	error = i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_GAIN,
+					  0x08);
+	if (error < 0)
+		return error;
+
+	error = i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_HYS,
+					  0xc2);
+	if (error < 0)
+		return error;
+
+	error = i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_CYCLE,
+					  0x04);
+	if (error < 0)
+		return error;
+
+	error = gp2a_disable(dt);
+
+	return error;
+}
+
+static int __devinit gp2a_probe(struct i2c_client *client,
+				const struct i2c_device_id *id)
+{
+	const struct gp2a_platform_data *pdata = client->dev.platform_data;
+	struct gp2a_data *dt;
+	int error;
+
+	if (!pdata)
+		return -EINVAL;
+
+	if (pdata->hw_setup) {
+		error = pdata->hw_setup(client);
+		if (error < 0)
+			return error;
+	}
+
+	error = gpio_request_one(pdata->vout_gpio, GPIOF_IN, GP2A_I2C_NAME);
+	if (error)
+		goto err_hw_shutdown;
+
+	dt = kzalloc(sizeof(struct gp2a_data), GFP_KERNEL);
+	if (!dt) {
+		error = -ENOMEM;
+		goto err_free_gpio;
+	}
+
+	dt->pdata = pdata;
+	dt->i2c_client = client;
+
+	error = gp2a_initialize(dt);
+	if (error < 0)
+		goto err_free_mem;
+
+	dt->input = input_allocate_device();
+	if (!dt->input) {
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	input_set_drvdata(dt->input, dt);
+
+	dt->input->open = gp2a_device_open;
+	dt->input->close = gp2a_device_close;
+	dt->input->name = GP2A_I2C_NAME;
+	dt->input->id.bustype = BUS_I2C;
+	dt->input->dev.parent = &client->dev;
+
+	input_set_capability(dt->input, EV_SW, SW_FRONT_PROXIMITY);
+
+	error = request_threaded_irq(client->irq, NULL, gp2a_irq,
+			IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING |
+				IRQF_ONESHOT,
+			GP2A_I2C_NAME, dt);
+	if (error) {
+		dev_err(&client->dev, "irq request failed\n");
+		goto err_free_input_dev;
+	}
+
+	error = input_register_device(dt->input);
+	if (error) {
+		dev_err(&client->dev, "device registration failed\n");
+		goto err_free_irq;
+	}
+
+	device_init_wakeup(&client->dev, pdata->wakeup);
+	i2c_set_clientdata(client, dt);
+
+	return 0;
+
+err_free_irq:
+	free_irq(client->irq, dt);
+err_free_input_dev:
+	input_free_device(dt->input);
+err_free_mem:
+	kfree(dt);
+err_free_gpio:
+	gpio_free(pdata->vout_gpio);
+err_hw_shutdown:
+	if (pdata->hw_shutdown)
+		pdata->hw_shutdown(client);
+	return error;
+}
+
+static int __devexit gp2a_remove(struct i2c_client *client)
+{
+	struct gp2a_data *dt = i2c_get_clientdata(client);
+	const struct gp2a_platform_data *pdata = dt->pdata;
+
+	device_init_wakeup(&client->dev, false);
+
+	free_irq(client->irq, dt);
+
+	input_unregister_device(dt->input);
+	kfree(dt);
+
+	gpio_free(pdata->vout_gpio);
+
+	if (pdata->hw_shutdown)
+		pdata->hw_shutdown(client);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int gp2a_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct gp2a_data *dt = i2c_get_clientdata(client);
+	int retval = 0;
+
+	if (device_may_wakeup(&client->dev)) {
+		enable_irq_wake(client->irq);
+	} else {
+		mutex_lock(&dt->input->mutex);
+		if (dt->input->users)
+			retval = gp2a_disable(dt);
+		mutex_unlock(&dt->input->mutex);
+	}
+
+	return retval;
+}
+
+static int gp2a_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct gp2a_data *dt = i2c_get_clientdata(client);
+	int retval = 0;
+
+	if (device_may_wakeup(&client->dev)) {
+		disable_irq_wake(client->irq);
+	} else {
+		mutex_lock(&dt->input->mutex);
+		if (dt->input->users)
+			retval = gp2a_enable(dt);
+		mutex_unlock(&dt->input->mutex);
+	}
+
+	return retval;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(gp2a_pm, gp2a_suspend, gp2a_resume);
+
+static const struct i2c_device_id gp2a_i2c_id[] = {
+	{ GP2A_I2C_NAME, 0 },
+	{ }
+};
+
+static struct i2c_driver gp2a_i2c_driver = {
+	.driver = {
+		.name	= GP2A_I2C_NAME,
+		.owner	= THIS_MODULE,
+		.pm	= &gp2a_pm,
+	},
+	.probe		= gp2a_probe,
+	.remove		= __devexit_p(gp2a_remove),
+	.id_table	= gp2a_i2c_id,
+};
+
+static int __init gp2a_init(void)
+{
+	return i2c_add_driver(&gp2a_i2c_driver);
+}
+
+static void __exit gp2a_exit(void)
+{
+	i2c_del_driver(&gp2a_i2c_driver);
+}
+
+module_init(gp2a_init);
+module_exit(gp2a_exit);
+
+MODULE_AUTHOR("Courtney Cavin <courtney.cavin@sonyericsson.com>");
+MODULE_DESCRIPTION("Sharp GP2AP002A00F I2C Proximity/Opto sensor driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/input/gp2ap002a00f.h b/include/linux/input/gp2ap002a00f.h
new file mode 100644
index 000000000000..aad2fd44a61a
--- /dev/null
+++ b/include/linux/input/gp2ap002a00f.h
@@ -0,0 +1,22 @@
+#ifndef _GP2AP002A00F_H_
+#define _GP2AP002A00F_H_
+
+#include <linux/i2c.h>
+
+#define GP2A_I2C_NAME "gp2ap002a00f"
+
+/**
+ * struct gp2a_platform_data - Sharp gp2ap002a00f proximity platform data
+ * @vout_gpio: The gpio connected to the object detected pin (VOUT)
+ * @wakeup: Set to true if the proximity can wake the device from suspend
+ * @hw_setup: Callback for setting up hardware such as gpios and vregs
+ * @hw_shutdown: Callback for properly shutting down hardware
+ */
+struct gp2a_platform_data {
+	int vout_gpio;
+	bool wakeup;
+	int (*hw_setup)(struct i2c_client *client);
+	int (*hw_shutdown)(struct i2c_client *client);
+};
+
+#endif
-- 
cgit v1.2.3


From 267aed9dfeb2225960043f89db1f58d8ab522797 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 12 Dec 2011 13:54:36 +0000
Subject: UAPI: elf_read_implies_exec() is a kernel-only feature - so hide from
 userspace

elf_read_implies_exec() is a kernel-only feature as the second parameter is a
constant that isn't exported to userspace.  Not only that, but the
arch-specific overrides are not exported either.

So hide the macro from userspace.

Similarly, struct file should not be predeclared in userspace.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/elf.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/elf.h b/include/linux/elf.h
index 31f0508d7da7..999b4f52e8e5 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -7,15 +7,6 @@
 #include <asm/elf.h>
 #endif
 
-struct file;
-
-#ifndef elf_read_implies_exec
-  /* Executables for which elf_read_implies_exec() returns TRUE will
-     have the READ_IMPLIES_EXEC personality flag set automatically.
-     Override in asm/elf.h as needed.  */
-# define elf_read_implies_exec(ex, have_pt_gnu_stack)	0
-#endif
-
 /* 32-bit ELF base types. */
 typedef __u32	Elf32_Addr;
 typedef __u16	Elf32_Half;
@@ -414,6 +405,13 @@ typedef struct elf64_note {
 } Elf64_Nhdr;
 
 #ifdef __KERNEL__
+#ifndef elf_read_implies_exec
+  /* Executables for which elf_read_implies_exec() returns TRUE will
+     have the READ_IMPLIES_EXEC personality flag set automatically.
+     Override in asm/elf.h as needed.  */
+# define elf_read_implies_exec(ex, have_pt_gnu_stack)	0
+#endif
+
 #if ELF_CLASS == ELFCLASS32
 
 extern Elf32_Dyn _DYNAMIC [];
@@ -437,6 +435,8 @@ extern Elf64_Dyn _DYNAMIC [];
 #endif
 
 /* Optional callbacks to write extra ELF notes. */
+struct file;
+
 #ifndef ARCH_HAVE_EXTRA_ELF_NOTES
 static inline int elf_coredump_extra_notes_size(void) { return 0; }
 static inline int elf_coredump_extra_notes_write(struct file *file,
-- 
cgit v1.2.3


From 6a113ddc03bcc32d3d440dce42b445868d5be093 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 1 Dec 2011 12:04:58 +0100
Subject: iommu/amd: Add device errata handling

Add infrastructure for errata-handling and handle two known
erratas in the IOMMUv2 code.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu.c       | 57 ++++++++++++++++++++++++++++++++++++++---
 drivers/iommu/amd_iommu_types.h |  1 +
 include/linux/amd-iommu.h       | 18 +++++++++++++
 3 files changed, 73 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 71773d0fb769..e453bbd09445 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -172,6 +172,15 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev)
 	return true;
 }
 
+static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
+{
+	struct iommu_dev_data *dev_data;
+
+	dev_data = get_dev_data(&pdev->dev);
+
+	return dev_data->errata & (1 << erratum) ? true : false;
+}
+
 /*
  * In this function the list of preallocated protection domains is traversed to
  * find the domain for a specific device
@@ -1934,9 +1943,33 @@ static void pdev_iommuv2_disable(struct pci_dev *pdev)
 	pci_disable_pasid(pdev);
 }
 
+/* FIXME: Change generic reset-function to do the same */
+static int pri_reset_while_enabled(struct pci_dev *pdev)
+{
+	u16 control;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	if (!pos)
+		return -EINVAL;
+
+	pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
+	control |= PCI_PRI_RESET;
+	pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+
+	return 0;
+}
+
 static int pdev_iommuv2_enable(struct pci_dev *pdev)
 {
-	int ret;
+	bool reset_enable;
+	int reqs, ret;
+
+	/* FIXME: Hardcode number of outstanding requests for now */
+	reqs = 32;
+	if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
+		reqs = 1;
+	reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
 
 	/* Only allow access to user-accessible pages */
 	ret = pci_enable_pasid(pdev, 0);
@@ -1948,11 +1981,17 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
 	if (ret)
 		goto out_err;
 
-	/* FIXME: Hardcode number of outstanding requests for now */
-	ret = pci_enable_pri(pdev, 32);
+	/* Enable PRI */
+	ret = pci_enable_pri(pdev, reqs);
 	if (ret)
 		goto out_err;
 
+	if (reset_enable) {
+		ret = pri_reset_while_enabled(pdev);
+		if (ret)
+			goto out_err;
+	}
+
 	ret = pci_enable_ats(pdev, PAGE_SHIFT);
 	if (ret)
 		goto out_err;
@@ -3481,3 +3520,15 @@ struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
 	return domain->iommu_domain;
 }
 EXPORT_SYMBOL(amd_iommu_get_v2_domain);
+
+void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
+{
+	struct iommu_dev_data *dev_data;
+
+	if (!amd_iommu_v2_supported())
+		return;
+
+	dev_data = get_dev_data(&pdev->dev);
+	dev_data->errata |= (1 << erratum);
+}
+EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index c39988fbcbbb..6ad8b10b3130 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -404,6 +404,7 @@ struct iommu_dev_data {
 	} ats;				  /* ATS state */
 	bool pri_tlp;			  /* PASID TLB required for
 					     PPR completions */
+	u32 errata;			  /* Bitmap for errata to apply */
 };
 
 /*
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index a6863a2dec1f..4152c3073db4 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -26,6 +26,24 @@
 
 extern int amd_iommu_detect(void);
 
+
+/**
+ * amd_iommu_enable_device_erratum() - Enable erratum workaround for device
+ *				       in the IOMMUv2 driver
+ * @pdev: The PCI device the workaround is necessary for
+ * @erratum: The erratum workaround to enable
+ *
+ * Possible values for the erratum number are for now:
+ * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI
+ *					is enabled
+ * - AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE - Limit number of outstanding PRI
+ *					 requests to one
+ */
+#define AMD_PRI_DEV_ERRATUM_ENABLE_RESET		0
+#define AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE		1
+
+extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From ed96f228ba9725edf69385bffdc19ee5bb0ec641 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 23 Nov 2011 17:30:39 +0100
Subject: iommu/amd: Implement device aquisition code for IOMMUv2

This patch adds the amd_iommu_init_device() and
amd_iommu_free_device() functions which make a device and
the IOMMU ready for IOMMUv2 usage.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu_v2.c | 210 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/amd-iommu.h    |  23 ++++-
 2 files changed, 232 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index a19e07d11d12..bfceed25c186 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -16,20 +16,230 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/amd-iommu.h>
+#include <linux/mm_types.h>
 #include <linux/module.h>
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+
+#include "amd_iommu_proto.h"
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
 
+#define MAX_DEVICES		0x10000
+#define PRI_QUEUE_SIZE		512
+
+struct pri_queue {
+	atomic_t inflight;
+	bool finish;
+};
+
+struct pasid_state {
+	struct list_head list;			/* For global state-list */
+	atomic_t count;				/* Reference count */
+	struct task_struct *task;		/* Task bound to this PASID */
+	struct mm_struct *mm;			/* mm_struct for the faults */
+	struct pri_queue pri[PRI_QUEUE_SIZE];	/* PRI tag states */
+	struct device_state *device_state;	/* Link to our device_state */
+	int pasid;				/* PASID index */
+};
+
+struct device_state {
+	atomic_t count;
+	struct pci_dev *pdev;
+	struct pasid_state **states;
+	struct iommu_domain *domain;
+	int pasid_levels;
+	int max_pasids;
+	spinlock_t lock;
+};
+
+struct device_state **state_table;
+static spinlock_t state_lock;
+
+/* List and lock for all pasid_states */
+static LIST_HEAD(pasid_state_list);
+
+static u16 device_id(struct pci_dev *pdev)
+{
+	u16 devid;
+
+	devid = pdev->bus->number;
+	devid = (devid << 8) | pdev->devfn;
+
+	return devid;
+}
+
+static struct device_state *get_device_state(u16 devid)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+
+	spin_lock_irqsave(&state_lock, flags);
+	dev_state = state_table[devid];
+	if (dev_state != NULL)
+		atomic_inc(&dev_state->count);
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return dev_state;
+}
+
+static void free_device_state(struct device_state *dev_state)
+{
+	iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
+	iommu_domain_free(dev_state->domain);
+	kfree(dev_state);
+}
+
+static void put_device_state(struct device_state *dev_state)
+{
+	if (atomic_dec_and_test(&dev_state->count))
+		free_device_state(dev_state);
+}
+
+int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	int ret, tmp;
+	u16 devid;
+
+	might_sleep();
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	if (pasids <= 0 || pasids > (PASID_MASK + 1))
+		return -EINVAL;
+
+	devid = device_id(pdev);
+
+	dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
+	if (dev_state == NULL)
+		return -ENOMEM;
+
+	spin_lock_init(&dev_state->lock);
+	dev_state->pdev = pdev;
+
+	tmp = pasids;
+	for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
+		dev_state->pasid_levels += 1;
+
+	atomic_set(&dev_state->count, 1);
+	dev_state->max_pasids = pasids;
+
+	ret = -ENOMEM;
+	dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
+	if (dev_state->states == NULL)
+		goto out_free_dev_state;
+
+	dev_state->domain = iommu_domain_alloc(&pci_bus_type);
+	if (dev_state->domain == NULL)
+		goto out_free_states;
+
+	amd_iommu_domain_direct_map(dev_state->domain);
+
+	ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
+	if (ret)
+		goto out_free_domain;
+
+	ret = iommu_attach_device(dev_state->domain, &pdev->dev);
+	if (ret != 0)
+		goto out_free_domain;
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	if (state_table[devid] != NULL) {
+		spin_unlock_irqrestore(&state_lock, flags);
+		ret = -EBUSY;
+		goto out_free_domain;
+	}
+
+	state_table[devid] = dev_state;
+
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return 0;
+
+out_free_domain:
+	iommu_domain_free(dev_state->domain);
+
+out_free_states:
+	free_page((unsigned long)dev_state->states);
+
+out_free_dev_state:
+	kfree(dev_state);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_init_device);
+
+void amd_iommu_free_device(struct pci_dev *pdev)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	u16 devid;
+
+	if (!amd_iommu_v2_supported())
+		return;
+
+	devid = device_id(pdev);
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	dev_state = state_table[devid];
+	if (dev_state == NULL) {
+		spin_unlock_irqrestore(&state_lock, flags);
+		return;
+	}
+
+	state_table[devid] = NULL;
+
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	put_device_state(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_free_device);
+
 static int __init amd_iommu_v2_init(void)
 {
+	size_t state_table_size;
+
 	pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
 
+	spin_lock_init(&state_lock);
+
+	state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+	state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					       get_order(state_table_size));
+	if (state_table == NULL)
+		return -ENOMEM;
+
 	return 0;
 }
 
 static void __exit amd_iommu_v2_exit(void)
 {
+	struct device_state *dev_state;
+	size_t state_table_size;
+	int i;
+
+	for (i = 0; i < MAX_DEVICES; ++i) {
+		dev_state = get_device_state(i);
+
+		if (dev_state == NULL)
+			continue;
+
+		WARN_ON_ONCE(1);
+
+		amd_iommu_free_device(dev_state->pdev);
+		put_device_state(dev_state);
+	}
+
+	state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+	free_pages((unsigned long)state_table, get_order(state_table_size));
 }
 
 module_init(amd_iommu_v2_init);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 4152c3073db4..e8c7a2ec86b3 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -20,10 +20,12 @@
 #ifndef _ASM_X86_AMD_IOMMU_H
 #define _ASM_X86_AMD_IOMMU_H
 
-#include <linux/irqreturn.h>
+#include <linux/types.h>
 
 #ifdef CONFIG_AMD_IOMMU
 
+struct pci_dev;
+
 extern int amd_iommu_detect(void);
 
 
@@ -33,6 +35,7 @@ extern int amd_iommu_detect(void);
  * @pdev: The PCI device the workaround is necessary for
  * @erratum: The erratum workaround to enable
  *
+ * The function needs to be called before amd_iommu_init_device().
  * Possible values for the erratum number are for now:
  * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI
  *					is enabled
@@ -44,6 +47,24 @@ extern int amd_iommu_detect(void);
 
 extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum);
 
+/**
+ * amd_iommu_init_device() - Init device for use with IOMMUv2 driver
+ * @pdev: The PCI device to initialize
+ * @pasids: Number of PASIDs to support for this device
+ *
+ * This function does all setup for the device pdev so that it can be
+ * used with IOMMUv2.
+ * Returns 0 on success or negative value on error.
+ */
+extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
+
+/**
+ * amd_iommu_free_device() - Free all IOMMUv2 related device resources
+ *			     and disable IOMMUv2 usage for this device
+ * @pdev: The PCI device to disable IOMMUv2 usage for'
+ */
+extern void amd_iommu_free_device(struct pci_dev *pdev);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From 2d5503b624736abfe0e0bad281f9b8d8a705b930 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 24 Nov 2011 10:41:57 +0100
Subject: iommu/amd: Add routines to bind/unbind a pasid

This patch adds routines to bind a specific process
address-space to a given PASID.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu_v2.c | 306 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/amd-iommu.h    |  26 ++++
 2 files changed, 332 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index bfceed25c186..b5ee09ece651 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -19,6 +19,7 @@
 #include <linux/amd-iommu.h>
 #include <linux/mm_types.h>
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/iommu.h>
 #include <linux/pci.h>
 #include <linux/gfp.h>
@@ -61,6 +62,10 @@ static spinlock_t state_lock;
 
 /* List and lock for all pasid_states */
 static LIST_HEAD(pasid_state_list);
+static DEFINE_SPINLOCK(ps_lock);
+
+static void free_pasid_states(struct device_state *dev_state);
+static void unbind_pasid(struct device_state *dev_state, int pasid);
 
 static u16 device_id(struct pci_dev *pdev)
 {
@@ -88,8 +93,16 @@ static struct device_state *get_device_state(u16 devid)
 
 static void free_device_state(struct device_state *dev_state)
 {
+	/*
+	 * First detach device from domain - No more PRI requests will arrive
+	 * from that device after it is unbound from the IOMMUv2 domain.
+	 */
 	iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
+
+	/* Everything is down now, free the IOMMUv2 domain */
 	iommu_domain_free(dev_state->domain);
+
+	/* Finally get rid of the device-state */
 	kfree(dev_state);
 }
 
@@ -99,6 +112,296 @@ static void put_device_state(struct device_state *dev_state)
 		free_device_state(dev_state);
 }
 
+static void link_pasid_state(struct pasid_state *pasid_state)
+{
+	spin_lock(&ps_lock);
+	list_add_tail(&pasid_state->list, &pasid_state_list);
+	spin_unlock(&ps_lock);
+}
+
+static void __unlink_pasid_state(struct pasid_state *pasid_state)
+{
+	list_del(&pasid_state->list);
+}
+
+static void unlink_pasid_state(struct pasid_state *pasid_state)
+{
+	spin_lock(&ps_lock);
+	__unlink_pasid_state(pasid_state);
+	spin_unlock(&ps_lock);
+}
+
+/* Must be called under dev_state->lock */
+static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
+						  int pasid, bool alloc)
+{
+	struct pasid_state **root, **ptr;
+	int level, index;
+
+	level = dev_state->pasid_levels;
+	root  = dev_state->states;
+
+	while (true) {
+
+		index = (pasid >> (9 * level)) & 0x1ff;
+		ptr   = &root[index];
+
+		if (level == 0)
+			break;
+
+		if (*ptr == NULL) {
+			if (!alloc)
+				return NULL;
+
+			*ptr = (void *)get_zeroed_page(GFP_ATOMIC);
+			if (*ptr == NULL)
+				return NULL;
+		}
+
+		root   = (struct pasid_state **)*ptr;
+		level -= 1;
+	}
+
+	return ptr;
+}
+
+static int set_pasid_state(struct device_state *dev_state,
+			   struct pasid_state *pasid_state,
+			   int pasid)
+{
+	struct pasid_state **ptr;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dev_state->lock, flags);
+	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+	ret = -ENOMEM;
+	if (ptr == NULL)
+		goto out_unlock;
+
+	ret = -ENOMEM;
+	if (*ptr != NULL)
+		goto out_unlock;
+
+	*ptr = pasid_state;
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock_irqrestore(&dev_state->lock, flags);
+
+	return ret;
+}
+
+static void clear_pasid_state(struct device_state *dev_state, int pasid)
+{
+	struct pasid_state **ptr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_state->lock, flags);
+	ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+	if (ptr == NULL)
+		goto out_unlock;
+
+	*ptr = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&dev_state->lock, flags);
+}
+
+static struct pasid_state *get_pasid_state(struct device_state *dev_state,
+					   int pasid)
+{
+	struct pasid_state **ptr, *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_state->lock, flags);
+	ptr = __get_pasid_state_ptr(dev_state, pasid, false);
+
+	if (ptr == NULL)
+		goto out_unlock;
+
+	ret = *ptr;
+	if (ret)
+		atomic_inc(&ret->count);
+
+out_unlock:
+	spin_unlock_irqrestore(&dev_state->lock, flags);
+
+	return ret;
+}
+
+static void free_pasid_state(struct pasid_state *pasid_state)
+{
+	kfree(pasid_state);
+}
+
+static void put_pasid_state(struct pasid_state *pasid_state)
+{
+	if (atomic_dec_and_test(&pasid_state->count)) {
+		put_device_state(pasid_state->device_state);
+		mmput(pasid_state->mm);
+		free_pasid_state(pasid_state);
+	}
+}
+
+static void unbind_pasid(struct device_state *dev_state, int pasid)
+{
+	struct pasid_state *pasid_state;
+
+	pasid_state = get_pasid_state(dev_state, pasid);
+	if (pasid_state == NULL)
+		return;
+
+	unlink_pasid_state(pasid_state);
+
+	amd_iommu_domain_clear_gcr3(dev_state->domain, pasid);
+	clear_pasid_state(dev_state, pasid);
+
+	put_pasid_state(pasid_state); /* Reference taken in this function */
+	put_pasid_state(pasid_state); /* Reference taken in bind() function */
+}
+
+static void free_pasid_states_level1(struct pasid_state **tbl)
+{
+	int i;
+
+	for (i = 0; i < 512; ++i) {
+		if (tbl[i] == NULL)
+			continue;
+
+		free_page((unsigned long)tbl[i]);
+	}
+}
+
+static void free_pasid_states_level2(struct pasid_state **tbl)
+{
+	struct pasid_state **ptr;
+	int i;
+
+	for (i = 0; i < 512; ++i) {
+		if (tbl[i] == NULL)
+			continue;
+
+		ptr = (struct pasid_state **)tbl[i];
+		free_pasid_states_level1(ptr);
+	}
+}
+
+static void free_pasid_states(struct device_state *dev_state)
+{
+	struct pasid_state *pasid_state;
+	int i;
+
+	for (i = 0; i < dev_state->max_pasids; ++i) {
+		pasid_state = get_pasid_state(dev_state, i);
+		if (pasid_state == NULL)
+			continue;
+
+		unbind_pasid(dev_state, i);
+		put_pasid_state(pasid_state);
+	}
+
+	if (dev_state->pasid_levels == 2)
+		free_pasid_states_level2(dev_state->states);
+	else if (dev_state->pasid_levels == 1)
+		free_pasid_states_level1(dev_state->states);
+	else if (dev_state->pasid_levels != 0)
+		BUG();
+
+	free_page((unsigned long)dev_state->states);
+}
+
+int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+			 struct task_struct *task)
+{
+	struct pasid_state *pasid_state;
+	struct device_state *dev_state;
+	u16 devid;
+	int ret;
+
+	might_sleep();
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	devid     = device_id(pdev);
+	dev_state = get_device_state(devid);
+
+	if (dev_state == NULL)
+		return -EINVAL;
+
+	ret = -EINVAL;
+	if (pasid < 0 || pasid >= dev_state->max_pasids)
+		goto out;
+
+	ret = -ENOMEM;
+	pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
+	if (pasid_state == NULL)
+		goto out;
+
+	atomic_set(&pasid_state->count, 1);
+	pasid_state->task         = task;
+	pasid_state->mm           = get_task_mm(task);
+	pasid_state->device_state = dev_state;
+	pasid_state->pasid        = pasid;
+
+	if (pasid_state->mm == NULL)
+		goto out_free;
+
+	ret = set_pasid_state(dev_state, pasid_state, pasid);
+	if (ret)
+		goto out_free;
+
+	ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
+					__pa(pasid_state->mm->pgd));
+	if (ret)
+		goto out_clear_state;
+
+	link_pasid_state(pasid_state);
+
+	return 0;
+
+out_clear_state:
+	clear_pasid_state(dev_state, pasid);
+
+out_free:
+	put_pasid_state(pasid_state);
+
+out:
+	put_device_state(dev_state);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_bind_pasid);
+
+void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
+{
+	struct device_state *dev_state;
+	u16 devid;
+
+	might_sleep();
+
+	if (!amd_iommu_v2_supported())
+		return;
+
+	devid = device_id(pdev);
+	dev_state = get_device_state(devid);
+	if (dev_state == NULL)
+		return;
+
+	if (pasid < 0 || pasid >= dev_state->max_pasids)
+		goto out;
+
+	unbind_pasid(dev_state, pasid);
+
+out:
+	put_device_state(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_unbind_pasid);
+
 int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
 {
 	struct device_state *dev_state;
@@ -199,6 +502,9 @@ void amd_iommu_free_device(struct pci_dev *pdev)
 
 	spin_unlock_irqrestore(&state_lock, flags);
 
+	/* Get rid of any remaining pasid states */
+	free_pasid_states(dev_state);
+
 	put_device_state(dev_state);
 }
 EXPORT_SYMBOL(amd_iommu_free_device);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index e8c7a2ec86b3..23e21e15dfab 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -24,9 +24,13 @@
 
 #ifdef CONFIG_AMD_IOMMU
 
+struct task_struct;
 struct pci_dev;
 
 extern int amd_iommu_detect(void);
+extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+				struct task_struct *task);
+extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
 
 
 /**
@@ -65,6 +69,28 @@ extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
  */
 extern void amd_iommu_free_device(struct pci_dev *pdev);
 
+/**
+ * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device
+ * @pdev: The PCI device to bind the task to
+ * @pasid: The PASID on the device the task should be bound to
+ * @task: the task to bind
+ *
+ * The function returns 0 on success or a negative value on error.
+ */
+extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+				struct task_struct *task);
+
+/**
+ * amd_iommu_unbind_pasid() - Unbind a PASID from its task on
+ *			      a device
+ * @pdev: The device of the PASID
+ * @pasid: The PASID to unbind
+ *
+ * When this function returns the device is no longer using the PASID
+ * and the PASID is no longer bound to its task.
+ */
+extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From 26c34c25e54b4a352596d88c6e44a239dab8e1c5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 3 Nov 2011 13:20:38 +0000
Subject: mfd: Disable more pulls on WM8994

Disable more pulls by default on WM8994 for a small current saving. Since
some designs do leave SPKMODE floating provide platform data to allow that
to be left enabled.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c        | 11 ++++++++---
 include/linux/mfd/wm8994/pdata.h |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index a6846b04e156..c8956f2cd280 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -373,6 +373,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 	struct wm8994_pdata *pdata = wm8994->dev->platform_data;
 	const char *devname;
 	int ret, i;
+	int pulls = 0;
 
 	dev_set_drvdata(wm8994->dev, wm8994);
 
@@ -515,12 +516,16 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 		}
 
 		wm8994->ldo_ena_always_driven = pdata->ldo_ena_always_driven;
+
+		if (pdata->spkmode_pu)
+			pulls |= WM8994_SPKMODE_PU;
 	}
 
-	/* Disable LDO pulldowns while the device is active */
+	/* Disable unneeded pulls */
 	wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
-			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
-			0);
+			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD |
+			WM8994_SPKMODE_PU | WM8994_CSNADDR_PD,
+			pulls);
 
 	/* In some system designs where the regulators are not in use,
 	 * we can achieve a small reduction in leakage currents by
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index ea32f306dca6..54e2fef587d5 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -174,6 +174,12 @@ struct wm8994_pdata {
 	 * consumption will rise.
 	 */
 	bool ldo_ena_always_driven;
+
+	/*
+	 * SPKMODE must be pulled internally by the device on this
+	 * system.
+	 */
+	bool spkmode_pu;
 };
 
 #endif
-- 
cgit v1.2.3


From 4de45284d3927b5068de6ed972b11627a3428427 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 25 Oct 2011 15:44:12 +0200
Subject: mfd: Define some additional wm8994 registers

Add a bunch of definitions for wm8994 registers that are not currently
used by software.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/registers.h | 96 ++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/wm8994-tables.c     | 12 ++---
 2 files changed, 102 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/registers.h b/include/linux/mfd/wm8994/registers.h
index 83a9caec0e43..8317b19a4972 100644
--- a/include/linux/mfd/wm8994/registers.h
+++ b/include/linux/mfd/wm8994/registers.h
@@ -95,11 +95,15 @@
 #define WM8994_FLL1_CONTROL_3                   0x222
 #define WM8994_FLL1_CONTROL_4                   0x223
 #define WM8994_FLL1_CONTROL_5                   0x224
+#define WM8958_FLL1_EFS_1                       0x226
+#define WM8958_FLL1_EFS_2                       0x227
 #define WM8994_FLL2_CONTROL_1                   0x240
 #define WM8994_FLL2_CONTROL_2                   0x241
 #define WM8994_FLL2_CONTROL_3                   0x242
 #define WM8994_FLL2_CONTROL_4                   0x243
 #define WM8994_FLL2_CONTROL_5                   0x244
+#define WM8958_FLL2_EFS_1                       0x246
+#define WM8958_FLL2_EFS_2                       0x247
 #define WM8994_AIF1_CONTROL_1                   0x300
 #define WM8994_AIF1_CONTROL_2                   0x301
 #define WM8994_AIF1_MASTER_SLAVE                0x302
@@ -116,6 +120,7 @@
 #define WM8994_AIF2DAC_LRCLK                    0x315
 #define WM8994_AIF2DAC_DATA                     0x316
 #define WM8994_AIF2ADC_DATA                     0x317
+#define WM1811_AIF2TX_CONTROL                   0x318
 #define WM8958_AIF3_CONTROL_1                   0x320
 #define WM8958_AIF3_CONTROL_2                   0x321
 #define WM8958_AIF3DAC_DATA                     0x322
@@ -166,6 +171,7 @@
 #define WM8994_AIF1_DAC1_EQ_BAND_5_A            0x491
 #define WM8994_AIF1_DAC1_EQ_BAND_5_B            0x492
 #define WM8994_AIF1_DAC1_EQ_BAND_5_PG           0x493
+#define WM8994_AIF1_DAC1_EQ_BAND_1_C            0x494
 #define WM8994_AIF1_DAC2_EQ_GAINS_1             0x4A0
 #define WM8994_AIF1_DAC2_EQ_GAINS_2             0x4A1
 #define WM8994_AIF1_DAC2_EQ_BAND_1_A            0x4A2
@@ -186,6 +192,7 @@
 #define WM8994_AIF1_DAC2_EQ_BAND_5_A            0x4B1
 #define WM8994_AIF1_DAC2_EQ_BAND_5_B            0x4B2
 #define WM8994_AIF1_DAC2_EQ_BAND_5_PG           0x4B3
+#define WM8994_AIF1_DAC2_EQ_BAND_1_C            0x4B4
 #define WM8994_AIF2_ADC_LEFT_VOLUME             0x500
 #define WM8994_AIF2_ADC_RIGHT_VOLUME            0x501
 #define WM8994_AIF2_DAC_LEFT_VOLUME             0x502
@@ -219,6 +226,7 @@
 #define WM8994_AIF2_EQ_BAND_5_A                 0x591
 #define WM8994_AIF2_EQ_BAND_5_B                 0x592
 #define WM8994_AIF2_EQ_BAND_5_PG                0x593
+#define WM8994_AIF2_EQ_BAND_1_C                 0x594
 #define WM8994_DAC1_MIXER_VOLUMES               0x600
 #define WM8994_DAC1_LEFT_MIXER_ROUTING          0x601
 #define WM8994_DAC1_RIGHT_MIXER_ROUTING         0x602
@@ -264,7 +272,43 @@
 #define WM8958_DSP2_RELEASETIME                 0xA03
 #define WM8958_DSP2_VERMAJMIN                   0xA04
 #define WM8958_DSP2_VERBUILD                    0xA05
+#define WM8958_DSP2_TESTREG                     0xA06
+#define WM8958_DSP2_XORREG                      0xA07
+#define WM8958_DSP2_SHIFTMAXX                   0xA08
+#define WM8958_DSP2_SHIFTMAXY                   0xA09
+#define WM8958_DSP2_SHIFTMAXZ                   0xA0A
+#define WM8958_DSP2_SHIFTMAXEXTLO               0xA0B
+#define WM8958_DSP2_AESSELECT                   0xA0C
 #define WM8958_DSP2_EXECCONTROL                 0xA0D
+#define WM8958_DSP2_SAMPLEBREAK                 0xA0E
+#define WM8958_DSP2_COUNTBREAK                  0xA0F
+#define WM8958_DSP2_INTSTATUS                   0xA10
+#define WM8958_DSP2_EVENTSTATUS                 0xA11
+#define WM8958_DSP2_INTMASK                     0xA12
+#define WM8958_DSP2_CONFIGDWIDTH                0xA13
+#define WM8958_DSP2_CONFIGINSTR                 0xA14
+#define WM8958_DSP2_CONFIGDMEM                  0xA15
+#define WM8958_DSP2_CONFIGDELAYS                0xA16
+#define WM8958_DSP2_CONFIGNUMIO                 0xA17
+#define WM8958_DSP2_CONFIGEXTDEPTH              0xA18
+#define WM8958_DSP2_CONFIGMULTIPLIER            0xA19
+#define WM8958_DSP2_CONFIGCTRLDWIDTH            0xA1A
+#define WM8958_DSP2_CONFIGPIPELINE              0xA1B
+#define WM8958_DSP2_SHIFTMAXEXTHI               0xA1C
+#define WM8958_DSP2_SWVERSIONREG                0xA1D
+#define WM8958_DSP2_CONFIGXMEM                  0xA1E
+#define WM8958_DSP2_CONFIGYMEM                  0xA1F
+#define WM8958_DSP2_CONFIGZMEM                  0xA20
+#define WM8958_FW_BUILD_1                       0x2000
+#define WM8958_FW_BUILD_0                       0x2001
+#define WM8958_FW_ID_1                          0x2002
+#define WM8958_FW_ID_0                          0x2003
+#define WM8958_FW_MAJOR_1                       0x2004
+#define WM8958_FW_MAJOR_0                       0x2005
+#define WM8958_FW_MINOR_1                       0x2006
+#define WM8958_FW_MINOR_0                       0x2007
+#define WM8958_FW_PATCH_1                       0x2008
+#define WM8958_FW_PATCH_0                       0x2009
 #define WM8958_MBC_BAND_2_LOWER_CUTOFF_C1_1     0x2200
 #define WM8958_MBC_BAND_2_LOWER_CUTOFF_C1_2     0x2201
 #define WM8958_MBC_BAND_2_LOWER_CUTOFF_C2_1     0x2202
@@ -333,6 +377,14 @@
 #define WM8958_MBC_B2_PG2_2                     0x242D
 #define WM8958_MBC_B1_PG2_1                     0x242E
 #define WM8958_MBC_B1_PG2_2                     0x242F
+#define WM8958_MBC_CROSSOVER_1                  0x2600
+#define WM8958_MBC_CROSSOVER_2                  0x2601
+#define WM8958_MBC_HPF_1                        0x2602
+#define WM8958_MBC_HPF_2                        0x2603
+#define WM8958_MBC_LPF_1                        0x2606
+#define WM8958_MBC_LPF_2                        0x2607
+#define WM8958_MBC_RMS_LIMIT_1                  0x260A
+#define WM8958_MBC_RMS_LIMIT_2                  0x260B
 #define WM8994_WRITE_SEQUENCER_0                0x3000
 #define WM8994_WRITE_SEQUENCER_1                0x3001
 #define WM8994_WRITE_SEQUENCER_2                0x3002
@@ -2389,6 +2441,10 @@
 /*
  * R548 (0x224) - FLL1 Control (5)
  */
+#define WM8958_FLL1_BYP                         0x8000  /* FLL1_BYP */
+#define WM8958_FLL1_BYP_MASK                    0x8000  /* FLL1_BYP */
+#define WM8958_FLL1_BYP_SHIFT                       15  /* FLL1_BYP */
+#define WM8958_FLL1_BYP_WIDTH                        1  /* FLL1_BYP */
 #define WM8994_FLL1_FRC_NCO_VAL_MASK            0x1F80  /* FLL1_FRC_NCO_VAL - [12:7] */
 #define WM8994_FLL1_FRC_NCO_VAL_SHIFT                7  /* FLL1_FRC_NCO_VAL - [12:7] */
 #define WM8994_FLL1_FRC_NCO_VAL_WIDTH                6  /* FLL1_FRC_NCO_VAL - [12:7] */
@@ -2403,6 +2459,24 @@
 #define WM8994_FLL1_REFCLK_SRC_SHIFT                 0  /* FLL1_REFCLK_SRC - [1:0] */
 #define WM8994_FLL1_REFCLK_SRC_WIDTH                 2  /* FLL1_REFCLK_SRC - [1:0] */
 
+/*
+ * R550 (0x226) - FLL1 EFS 1
+ */
+#define WM8958_FLL1_LAMBDA_MASK                 0xFFFF  /* FLL1_LAMBDA - [15:0] */
+#define WM8958_FLL1_LAMBDA_SHIFT                     0  /* FLL1_LAMBDA - [15:0] */
+#define WM8958_FLL1_LAMBDA_WIDTH                    16  /* FLL1_LAMBDA - [15:0] */
+
+/*
+ * R551 (0x227) - FLL1 EFS 2
+ */
+#define WM8958_FLL1_LFSR_SEL_MASK               0x0006  /* FLL1_LFSR_SEL - [2:1] */
+#define WM8958_FLL1_LFSR_SEL_SHIFT                   1  /* FLL1_LFSR_SEL - [2:1] */
+#define WM8958_FLL1_LFSR_SEL_WIDTH                   2  /* FLL1_LFSR_SEL - [2:1] */
+#define WM8958_FLL1_EFS_ENA                     0x0001  /* FLL1_EFS_ENA */
+#define WM8958_FLL1_EFS_ENA_MASK                0x0001  /* FLL1_EFS_ENA */
+#define WM8958_FLL1_EFS_ENA_SHIFT                    0  /* FLL1_EFS_ENA */
+#define WM8958_FLL1_EFS_ENA_WIDTH                    1  /* FLL1_EFS_ENA */
+
 /*
  * R576 (0x240) - FLL2 Control (1)
  */
@@ -2452,6 +2526,10 @@
 /*
  * R580 (0x244) - FLL2 Control (5)
  */
+#define WM8958_FLL2_BYP                         0x8000  /* FLL2_BYP */
+#define WM8958_FLL2_BYP_MASK                    0x8000  /* FLL2_BYP */
+#define WM8958_FLL2_BYP_SHIFT                       15  /* FLL2_BYP */
+#define WM8958_FLL2_BYP_WIDTH                        1  /* FLL2_BYP */
 #define WM8994_FLL2_FRC_NCO_VAL_MASK            0x1F80  /* FLL2_FRC_NCO_VAL - [12:7] */
 #define WM8994_FLL2_FRC_NCO_VAL_SHIFT                7  /* FLL2_FRC_NCO_VAL - [12:7] */
 #define WM8994_FLL2_FRC_NCO_VAL_WIDTH                6  /* FLL2_FRC_NCO_VAL - [12:7] */
@@ -2466,6 +2544,24 @@
 #define WM8994_FLL2_REFCLK_SRC_SHIFT                 0  /* FLL2_REFCLK_SRC - [1:0] */
 #define WM8994_FLL2_REFCLK_SRC_WIDTH                 2  /* FLL2_REFCLK_SRC - [1:0] */
 
+/*
+ * R582 (0x246) - FLL2 EFS 1
+ */
+#define WM8958_FLL2_LAMBDA_MASK                 0xFFFF  /* FLL2_LAMBDA - [15:0] */
+#define WM8958_FLL2_LAMBDA_SHIFT                     0  /* FLL2_LAMBDA - [15:0] */
+#define WM8958_FLL2_LAMBDA_WIDTH                    16  /* FLL2_LAMBDA - [15:0] */
+
+/*
+ * R583 (0x247) - FLL2 EFS 2
+ */
+#define WM8958_FLL2_LFSR_SEL_MASK               0x0006  /* FLL2_LFSR_SEL - [2:1] */
+#define WM8958_FLL2_LFSR_SEL_SHIFT                   1  /* FLL2_LFSR_SEL - [2:1] */
+#define WM8958_FLL2_LFSR_SEL_WIDTH                   2  /* FLL2_LFSR_SEL - [2:1] */
+#define WM8958_FLL2_EFS_ENA                     0x0001  /* FLL2_EFS_ENA */
+#define WM8958_FLL2_EFS_ENA_MASK                0x0001  /* FLL2_EFS_ENA */
+#define WM8958_FLL2_EFS_ENA_SHIFT                    0  /* FLL2_EFS_ENA */
+#define WM8958_FLL2_EFS_ENA_WIDTH                    1  /* FLL2_EFS_ENA */
+
 /*
  * R768 (0x300) - AIF1 Control (1)
  */
diff --git a/sound/soc/codecs/wm8994-tables.c b/sound/soc/codecs/wm8994-tables.c
index df5a8b9a250f..6ed19d9e7454 100644
--- a/sound/soc/codecs/wm8994-tables.c
+++ b/sound/soc/codecs/wm8994-tables.c
@@ -78,7 +78,7 @@ const struct wm8994_access_mask wm8994_access_masks[WM8994_CACHE_SIZE] = {
 	{ 0x0000, 0x0000 }, /* R74 */
 	{ 0x0000, 0x0000 }, /* R75 */
 	{ 0x8000, 0x8000 }, /* R76    - Charge Pump (1) */
-	{ 0x0000, 0x0000 }, /* R77 */
+	{ 0x8000, 0x8000 }, /* R77    - Charge Pump (2) */
 	{ 0x0000, 0x0000 }, /* R78 */
 	{ 0x0000, 0x0000 }, /* R79 */
 	{ 0x0000, 0x0000 }, /* R80 */
@@ -1651,7 +1651,7 @@ const u16 wm8994_reg_defaults[WM8994_CACHE_SIZE] = {
 	0x0000,     /* R74 */
 	0x0000,     /* R75 */
 	0x1F25,     /* R76    - Charge Pump (1) */
-	0x0000,     /* R77 */
+	0xAB19,     /* R77    - Charge Pump (2) */
 	0x0000,     /* R78 */
 	0x0000,     /* R79 */
 	0x0000,     /* R80 */
@@ -2124,8 +2124,8 @@ const u16 wm8994_reg_defaults[WM8994_CACHE_SIZE] = {
 	0x0000,     /* R547   - FLL1 Control (4) */
 	0x0C80,     /* R548   - FLL1 Control (5) */
 	0x0000,     /* R549 */
-	0x0000,     /* R550 */
-	0x0000,     /* R551 */
+	0x0000,     /* R550   - FLL1 EFS 1 */
+	0x0006,     /* R551   - FLL1 EFS 2 */
 	0x0000,     /* R552 */
 	0x0000,     /* R553 */
 	0x0000,     /* R554 */
@@ -2156,8 +2156,8 @@ const u16 wm8994_reg_defaults[WM8994_CACHE_SIZE] = {
 	0x0000,     /* R579   - FLL2 Control (4) */
 	0x0C80,     /* R580   - FLL2 Control (5) */
 	0x0000,     /* R581 */
-	0x0000,     /* R582 */
-	0x0000,     /* R583 */
+	0x0000,     /* R582   - FLL2 EFS 1 */
+	0x0006,     /* R583   - FLL2 EFS 2 */
 	0x0000,     /* R584 */
 	0x0000,     /* R585 */
 	0x0000,     /* R586 */
-- 
cgit v1.2.3


From c3f1386171a100d27d9fb978f474a6a330888af5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 25 Oct 2011 14:23:53 +0200
Subject: mfd: Enable register cache for wm8994 devices

As part of this we provide information about the registers that exist in
the device to the regmap core, drop the small amount of cache that the
core had been using and let regmap do the sync.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c       | 82 +++++++++++++++--------------------------
 include/linux/mfd/wm8994/core.h |  2 -
 2 files changed, 30 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 016769475ffb..aafac5b5f3a5 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -28,11 +28,7 @@
 #include <linux/mfd/wm8994/pdata.h>
 #include <linux/mfd/wm8994/registers.h>
 
-static int wm8994_read(struct wm8994 *wm8994, unsigned short reg,
-		       int bytes, void *dest)
-{
-	return regmap_raw_read(wm8994->regmap, reg, dest, bytes);
-}
+#include "wm8994.h"
 
 /**
  * wm8994_reg_read: Read a single WM8994 register.
@@ -68,12 +64,6 @@ int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
 	return regmap_bulk_read(wm8994->regmap, reg, buf, count);
 }
 
-static int wm8994_write(struct wm8994 *wm8994, unsigned short reg,
-			int bytes, const void *src)
-{
-	return regmap_raw_write(wm8994->regmap, reg, src, bytes);
-}
-
 /**
  * wm8994_reg_write: Write a single WM8994 register.
  *
@@ -258,27 +248,14 @@ static int wm8994_suspend(struct device *dev)
 				WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
 				WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD);
 
-	/* GPIO configuration state is saved here since we may be configuring
-	 * the GPIO alternate functions even if we're not using the gpiolib
-	 * driver for them.
-	 */
-	ret = wm8994_read(wm8994, WM8994_GPIO_1, WM8994_NUM_GPIO_REGS * 2,
-			  &wm8994->gpio_regs);
-	if (ret < 0)
-		dev_err(dev, "Failed to save GPIO registers: %d\n", ret);
-
-	/* For similar reasons we also stash the regulator states */
-	ret = wm8994_read(wm8994, WM8994_LDO_1, WM8994_NUM_LDO_REGS * 2,
-			  &wm8994->ldo_regs);
-	if (ret < 0)
-		dev_err(dev, "Failed to save LDO registers: %d\n", ret);
-
 	/* Explicitly put the device into reset in case regulators
 	 * don't get disabled in order to ensure consistent restart.
 	 */
 	wm8994_reg_write(wm8994, WM8994_SOFTWARE_RESET,
 			 wm8994_reg_read(wm8994, WM8994_SOFTWARE_RESET));
 
+	regcache_mark_dirty(wm8994->regmap);
+
 	wm8994->suspended = true;
 
 	ret = regulator_bulk_disable(wm8994->num_supplies,
@@ -294,7 +271,7 @@ static int wm8994_suspend(struct device *dev)
 static int wm8994_resume(struct device *dev)
 {
 	struct wm8994 *wm8994 = dev_get_drvdata(dev);
-	int ret, i;
+	int ret;
 
 	/* We may have lied to the PM core about suspending */
 	if (!wm8994->suspended)
@@ -307,27 +284,12 @@ static int wm8994_resume(struct device *dev)
 		return ret;
 	}
 
-	/* Write register at a time as we use the cache on the CPU so store
-	 * it in native endian.
-	 */
-	for (i = 0; i < ARRAY_SIZE(wm8994->irq_masks_cur); i++) {
-		ret = wm8994_reg_write(wm8994, WM8994_INTERRUPT_STATUS_1_MASK
-				       + i, wm8994->irq_masks_cur[i]);
-		if (ret < 0)
-			dev_err(dev, "Failed to restore interrupt masks: %d\n",
-				ret);
+	ret = regcache_sync(wm8994->regmap);
+	if (ret != 0) {
+		dev_err(dev, "Failed to restore register map: %d\n", ret);
+		goto err_enable;
 	}
 
-	ret = wm8994_write(wm8994, WM8994_LDO_1, WM8994_NUM_LDO_REGS * 2,
-			   &wm8994->ldo_regs);
-	if (ret < 0)
-		dev_err(dev, "Failed to restore LDO registers: %d\n", ret);
-
-	ret = wm8994_write(wm8994, WM8994_GPIO_1, WM8994_NUM_GPIO_REGS * 2,
-			   &wm8994->gpio_regs);
-	if (ret < 0)
-		dev_err(dev, "Failed to restore GPIO registers: %d\n", ret);
-
 	/* Disable LDO pulldowns while the device is active */
 	wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
 			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
@@ -336,6 +298,11 @@ static int wm8994_resume(struct device *dev)
 	wm8994->suspended = false;
 
 	return 0;
+
+err_enable:
+	regulator_bulk_disable(wm8994->num_supplies, wm8994->supplies);
+
+	return ret;
 }
 #endif
 
@@ -361,11 +328,6 @@ static int wm8994_ldo_in_use(struct wm8994_pdata *pdata, int ldo)
 }
 #endif
 
-static struct regmap_config wm8994_regmap_config = {
-	.reg_bits = 16,
-	.val_bits = 16,
-};
-
 /*
  * Instantiate the generic non-control parts of the device.
  */
@@ -594,6 +556,7 @@ static int wm8994_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
 	struct wm8994 *wm8994;
+	struct regmap_config *regmap_config;
 	int ret;
 
 	wm8994 = devm_kzalloc(&i2c->dev, sizeof(struct wm8994), GFP_KERNEL);
@@ -605,7 +568,22 @@ static int wm8994_i2c_probe(struct i2c_client *i2c,
 	wm8994->irq = i2c->irq;
 	wm8994->type = id->driver_data;
 
-	wm8994->regmap = regmap_init_i2c(i2c, &wm8994_regmap_config);
+	switch (wm8994->type) {
+	case WM1811:
+		regmap_config = &wm1811_regmap_config;
+		break;
+	case WM8994:
+		regmap_config = &wm8994_regmap_config;
+		break;
+	case WM8958:
+		regmap_config = &wm8958_regmap_config;
+		break;
+	default:
+		dev_err(wm8994->dev, "Unknown device type %d\n", wm8994->type);
+		return -EINVAL;
+	}
+
+	wm8994->regmap = regmap_init_i2c(i2c, regmap_config);
 	if (IS_ERR(wm8994->regmap)) {
 		ret = PTR_ERR(wm8994->regmap);
 		dev_err(wm8994->dev, "Failed to allocate register map: %d\n",
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index f44bdb7273bd..d98593d52e7c 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -70,8 +70,6 @@ struct wm8994 {
 
 	/* Used over suspend/resume */
 	bool suspended;
-	u16 ldo_regs[WM8994_NUM_LDO_REGS];
-	u16 gpio_regs[WM8994_NUM_GPIO_REGS];
 
 	struct regulator_dev *dbvdd;
 	int num_supplies;
-- 
cgit v1.2.3


From 43913e5ef9e9e05141418577523456e6b23777eb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 28 Nov 2011 18:48:14 +0000
Subject: mfd: Constify WM8994 regulator_init_data

The driver has no need to modify the regulator_init_data so declare it
const to allow machine code to do so.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/pdata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 54e2fef587d5..b00897a6c461 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -23,7 +23,7 @@ struct wm8994_ldo_pdata {
 	int enable;
 
 	const char *supply;
-	struct regulator_init_data *init_data;
+	const struct regulator_init_data *init_data;
 };
 
 #define WM8994_CONFIGURE_GPIO 0x10000
-- 
cgit v1.2.3


From 19f9557174d61fcfe132a6846a83c36437ff014e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Dec 2011 13:53:18 +0000
Subject: mfd: Add missing mutex.h inclusion to WM8994 core.h

struct wm8994 includes a mutex so we need to include mutex.h before we
declare it. All current users rely on this being done implicitly.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index d98593d52e7c..f537d2eae390 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -15,6 +15,7 @@
 #ifndef __MFD_WM8994_CORE_H__
 #define __MFD_WM8994_CORE_H__
 
+#include <linux/mutex.h>
 #include <linux/interrupt.h>
 
 enum wm8994_type {
-- 
cgit v1.2.3


From 7ed5849c2861faf9c13f027868f635bd782a50e5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Dec 2011 13:55:49 +0000
Subject: mfd: Mark WM1811 GPIO6 register volatile for later revisions

For later chip revisions the WM1811 GPIO6 register is always volatile so
store the device revision when initialising the driver and then check at
runtime if we're running on a newer device.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c       | 12 +++++++-----
 drivers/mfd/wm8994-regmap.c     | 19 +++++++++++++++++--
 include/linux/mfd/wm8994/core.h |  1 +
 3 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 8b4f22a88e22..93f8599aba32 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -446,15 +446,16 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 			ret);
 		goto err_enable;
 	}
+	wm8994->revision = ret;
 
 	switch (wm8994->type) {
 	case WM8994:
-		switch (ret) {
+		switch (wm8994->revision) {
 		case 0:
 		case 1:
 			dev_warn(wm8994->dev,
 				 "revision %c not fully supported\n",
-				 'A' + ret);
+				 'A' + wm8994->revision);
 			break;
 		default:
 			break;
@@ -462,14 +463,15 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 		break;
 	case WM1811:
 		/* Revision C did not change the relevant layer */
-		if (ret > 1)
-			ret++;
+		if (wm8994->revision > 1)
+			wm8994->revision++;
 		break;
 	default:
 		break;
 	}
 
-	dev_info(wm8994->dev, "%s revision %c\n", devname, 'A' + ret);
+	dev_info(wm8994->dev, "%s revision %c\n", devname,
+		 'A' + wm8994->revision);
 
 	switch (wm8994->type) {
 	case WM1811:
diff --git a/drivers/mfd/wm8994-regmap.c b/drivers/mfd/wm8994-regmap.c
index 03594c257ef2..c598ae69b8ff 100644
--- a/drivers/mfd/wm8994-regmap.c
+++ b/drivers/mfd/wm8994-regmap.c
@@ -12,6 +12,7 @@
  *
  */
 
+#include <linux/mfd/wm8994/core.h>
 #include <linux/mfd/wm8994/registers.h>
 #include <linux/regmap.h>
 
@@ -210,7 +211,6 @@ static struct reg_default wm1811_defaults[] = {
 	{ 0x0702, 0xA101 },    /* R1794 - Pull Control (BCLK2) */
 	{ 0x0703, 0xA101 },    /* R1795 - Pull Control (DACLRCLK2) */
 	{ 0x0704, 0xA101 },    /* R1796 - Pull Control (DACDAT2) */
-	{ 0x0705, 0xA101 },    /* R1797 - GPIO 6 */
 	{ 0x0707, 0xA101 },    /* R1799 - GPIO 8 */
 	{ 0x0708, 0xA101 },    /* R1800 - GPIO 9 */
 	{ 0x0709, 0xA101 },    /* R1801 - GPIO 10 */
@@ -1145,6 +1145,21 @@ static bool wm8994_volatile_register(struct device *dev, unsigned int reg)
 	}
 }
 
+static bool wm1811_volatile_register(struct device *dev, unsigned int reg)
+{
+	struct wm8994 *wm8994 = dev_get_drvdata(dev);
+
+	switch (reg) {
+	case WM8994_GPIO_6:
+		if (wm8994->revision > 1)
+			return true;
+		else
+			return false;
+	default:
+		return wm8994_volatile_register(dev, reg);
+	}
+}
+
 static bool wm8958_volatile_register(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
@@ -1185,7 +1200,7 @@ struct regmap_config wm1811_regmap_config = {
 	.num_reg_defaults = ARRAY_SIZE(wm1811_defaults),
 
 	.max_register = WM8994_MAX_REGISTER,
-	.volatile_reg = wm8994_volatile_register,
+	.volatile_reg = wm1811_volatile_register,
 	.readable_reg = wm1811_readable_register,
 };
 
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index f537d2eae390..4dd4409678ce 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -56,6 +56,7 @@ struct wm8994 {
 	struct mutex irq_lock;
 
 	enum wm8994_type type;
+	int revision;
 
 	struct device *dev;
 	struct regmap *regmap;
-- 
cgit v1.2.3


From 8ab30691826fc05efa47c4ffba19b80496bb3a2c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 25 Oct 2011 10:19:04 +0200
Subject: mfd: Convert wm8994 to use generic regmap irq_chip

Factor out the irq_chip implementation, substantially reducing the code
size for the driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig             |   1 +
 drivers/mfd/wm8994-irq.c        | 196 +++++++---------------------------------
 include/linux/mfd/wm8994/core.h |   3 +-
 3 files changed, 35 insertions(+), 165 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f1391c21ef26..017f6dbab333 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -477,6 +477,7 @@ config MFD_WM8994
 	bool "Support Wolfson Microelectronics WM8994"
 	select MFD_CORE
 	select REGMAP_I2C
+	select REGMAP_IRQ
 	depends on I2C=y && GENERIC_HARDIRQS
 	help
 	  The WM8994 is a highly integrated hi-fi CODEC designed for
diff --git a/drivers/mfd/wm8994-irq.c b/drivers/mfd/wm8994-irq.c
index f9dd6b691258..46b20c445ecf 100644
--- a/drivers/mfd/wm8994-irq.c
+++ b/drivers/mfd/wm8994-irq.c
@@ -18,238 +18,127 @@
 #include <linux/irq.h>
 #include <linux/mfd/core.h>
 #include <linux/interrupt.h>
+#include <linux/regmap.h>
 
 #include <linux/mfd/wm8994/core.h>
 #include <linux/mfd/wm8994/registers.h>
 
 #include <linux/delay.h>
 
-struct wm8994_irq_data {
-	int reg;
-	int mask;
-};
-
-static struct wm8994_irq_data wm8994_irqs[] = {
+static struct regmap_irq wm8994_irqs[] = {
 	[WM8994_IRQ_TEMP_SHUT] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_TEMP_SHUT_EINT,
 	},
 	[WM8994_IRQ_MIC1_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_MIC1_DET_EINT,
 	},
 	[WM8994_IRQ_MIC1_SHRT] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_MIC1_SHRT_EINT,
 	},
 	[WM8994_IRQ_MIC2_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_MIC2_DET_EINT,
 	},
 	[WM8994_IRQ_MIC2_SHRT] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_MIC2_SHRT_EINT,
 	},
 	[WM8994_IRQ_FLL1_LOCK] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_FLL1_LOCK_EINT,
 	},
 	[WM8994_IRQ_FLL2_LOCK] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_FLL2_LOCK_EINT,
 	},
 	[WM8994_IRQ_SRC1_LOCK] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_SRC1_LOCK_EINT,
 	},
 	[WM8994_IRQ_SRC2_LOCK] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_SRC2_LOCK_EINT,
 	},
 	[WM8994_IRQ_AIF1DRC1_SIG_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_AIF1DRC1_SIG_DET,
 	},
 	[WM8994_IRQ_AIF1DRC2_SIG_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_AIF1DRC2_SIG_DET_EINT,
 	},
 	[WM8994_IRQ_AIF2DRC_SIG_DET] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_AIF2DRC_SIG_DET_EINT,
 	},
 	[WM8994_IRQ_FIFOS_ERR] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_FIFOS_ERR_EINT,
 	},
 	[WM8994_IRQ_WSEQ_DONE] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_WSEQ_DONE_EINT,
 	},
 	[WM8994_IRQ_DCS_DONE] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_DCS_DONE_EINT,
 	},
 	[WM8994_IRQ_TEMP_WARN] = {
-		.reg = 2,
+		.reg_offset = 1,
 		.mask = WM8994_TEMP_WARN_EINT,
 	},
 	[WM8994_IRQ_GPIO(1)] = {
-		.reg = 1,
 		.mask = WM8994_GP1_EINT,
 	},
 	[WM8994_IRQ_GPIO(2)] = {
-		.reg = 1,
 		.mask = WM8994_GP2_EINT,
 	},
 	[WM8994_IRQ_GPIO(3)] = {
-		.reg = 1,
 		.mask = WM8994_GP3_EINT,
 	},
 	[WM8994_IRQ_GPIO(4)] = {
-		.reg = 1,
 		.mask = WM8994_GP4_EINT,
 	},
 	[WM8994_IRQ_GPIO(5)] = {
-		.reg = 1,
 		.mask = WM8994_GP5_EINT,
 	},
 	[WM8994_IRQ_GPIO(6)] = {
-		.reg = 1,
 		.mask = WM8994_GP6_EINT,
 	},
 	[WM8994_IRQ_GPIO(7)] = {
-		.reg = 1,
 		.mask = WM8994_GP7_EINT,
 	},
 	[WM8994_IRQ_GPIO(8)] = {
-		.reg = 1,
 		.mask = WM8994_GP8_EINT,
 	},
 	[WM8994_IRQ_GPIO(9)] = {
-		.reg = 1,
 		.mask = WM8994_GP8_EINT,
 	},
 	[WM8994_IRQ_GPIO(10)] = {
-		.reg = 1,
 		.mask = WM8994_GP10_EINT,
 	},
 	[WM8994_IRQ_GPIO(11)] = {
-		.reg = 1,
 		.mask = WM8994_GP11_EINT,
 	},
 };
 
-static inline struct wm8994_irq_data *irq_to_wm8994_irq(struct wm8994 *wm8994,
-							int irq)
-{
-	return &wm8994_irqs[irq - wm8994->irq_base];
-}
-
-static void wm8994_irq_lock(struct irq_data *data)
-{
-	struct wm8994 *wm8994 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&wm8994->irq_lock);
-}
-
-static void wm8994_irq_sync_unlock(struct irq_data *data)
-{
-	struct wm8994 *wm8994 = irq_data_get_irq_chip_data(data);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(wm8994->irq_masks_cur); i++) {
-		/* If there's been a change in the mask write it back
-		 * to the hardware. */
-		if (wm8994->irq_masks_cur[i] != wm8994->irq_masks_cache[i]) {
-			wm8994->irq_masks_cache[i] = wm8994->irq_masks_cur[i];
-			wm8994_reg_write(wm8994,
-					 WM8994_INTERRUPT_STATUS_1_MASK + i,
-					 wm8994->irq_masks_cur[i]);
-		}
-	}
-
-	mutex_unlock(&wm8994->irq_lock);
-}
-
-static void wm8994_irq_enable(struct irq_data *data)
-{
-	struct wm8994 *wm8994 = irq_data_get_irq_chip_data(data);
-	struct wm8994_irq_data *irq_data = irq_to_wm8994_irq(wm8994,
-							     data->irq);
-
-	wm8994->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask;
-}
-
-static void wm8994_irq_disable(struct irq_data *data)
-{
-	struct wm8994 *wm8994 = irq_data_get_irq_chip_data(data);
-	struct wm8994_irq_data *irq_data = irq_to_wm8994_irq(wm8994,
-							     data->irq);
-
-	wm8994->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask;
-}
+static struct regmap_irq_chip wm8994_irq_chip = {
+	.name = "wm8994",
+	.irqs = wm8994_irqs,
+	.num_irqs = ARRAY_SIZE(wm8994_irqs),
 
-static struct irq_chip wm8994_irq_chip = {
-	.name			= "wm8994",
-	.irq_bus_lock		= wm8994_irq_lock,
-	.irq_bus_sync_unlock	= wm8994_irq_sync_unlock,
-	.irq_disable		= wm8994_irq_disable,
-	.irq_enable		= wm8994_irq_enable,
+	.num_regs = 2,
+	.status_base = WM8994_INTERRUPT_STATUS_1,
+	.mask_base = WM8994_INTERRUPT_STATUS_1_MASK,
+	.ack_base = WM8994_INTERRUPT_STATUS_1,
 };
 
-/* The processing of the primary interrupt occurs in a thread so that
- * we can interact with the device over I2C or SPI. */
-static irqreturn_t wm8994_irq_thread(int irq, void *data)
-{
-	struct wm8994 *wm8994 = data;
-	unsigned int i;
-	u16 status[WM8994_NUM_IRQ_REGS];
-	int ret;
-
-	ret = wm8994_bulk_read(wm8994, WM8994_INTERRUPT_STATUS_1,
-			       WM8994_NUM_IRQ_REGS, status);
-	if (ret < 0) {
-		dev_err(wm8994->dev, "Failed to read interrupt status: %d\n",
-			ret);
-		return IRQ_NONE;
-	}
-
-	/* Bit swap and apply masking */
-	for (i = 0; i < WM8994_NUM_IRQ_REGS; i++) {
-		status[i] = be16_to_cpu(status[i]);
-		status[i] &= ~wm8994->irq_masks_cur[i];
-	}
-
-	/* Ack any unmasked IRQs */
-	for (i = 0; i < ARRAY_SIZE(status); i++) {
-		if (status[i])
-			wm8994_reg_write(wm8994, WM8994_INTERRUPT_STATUS_1 + i,
-					 status[i]);
-	}
-
-	/* Report */
-	for (i = 0; i < ARRAY_SIZE(wm8994_irqs); i++) {
-		if (status[wm8994_irqs[i].reg - 1] & wm8994_irqs[i].mask)
-			handle_nested_irq(wm8994->irq_base + i);
-	}
-
-	return IRQ_HANDLED;
-}
-
 int wm8994_irq_init(struct wm8994 *wm8994)
 {
-	int i, cur_irq, ret;
-
-	mutex_init(&wm8994->irq_lock);
-
-	/* Mask the individual interrupt sources */
-	for (i = 0; i < ARRAY_SIZE(wm8994->irq_masks_cur); i++) {
-		wm8994->irq_masks_cur[i] = 0xffff;
-		wm8994->irq_masks_cache[i] = 0xffff;
-		wm8994_reg_write(wm8994, WM8994_INTERRUPT_STATUS_1_MASK + i,
-				 0xffff);
-	}
+	int ret;
 
 	if (!wm8994->irq) {
 		dev_warn(wm8994->dev,
@@ -264,30 +153,12 @@ int wm8994_irq_init(struct wm8994 *wm8994)
 		return 0;
 	}
 
-	/* Register them with genirq */
-	for (cur_irq = wm8994->irq_base;
-	     cur_irq < ARRAY_SIZE(wm8994_irqs) + wm8994->irq_base;
-	     cur_irq++) {
-		irq_set_chip_data(cur_irq, wm8994);
-		irq_set_chip_and_handler(cur_irq, &wm8994_irq_chip,
-					 handle_edge_irq);
-		irq_set_nested_thread(cur_irq, 1);
-
-		/* ARM needs us to explicitly flag the IRQ as valid
-		 * and will set them noprobe when we do so. */
-#ifdef CONFIG_ARM
-		set_irq_flags(cur_irq, IRQF_VALID);
-#else
-		irq_set_noprobe(cur_irq);
-#endif
-	}
-
-	ret = request_threaded_irq(wm8994->irq, NULL, wm8994_irq_thread,
-				   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
-				   "wm8994", wm8994);
+	ret = regmap_add_irq_chip(wm8994->regmap, wm8994->irq,
+				  IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				  wm8994->irq_base, &wm8994_irq_chip,
+				  &wm8994->irq_data);
 	if (ret != 0) {
-		dev_err(wm8994->dev, "Failed to request IRQ %d: %d\n",
-			wm8994->irq, ret);
+		dev_err(wm8994->dev, "Failed to register IRQ chip: %d\n", ret);
 		return ret;
 	}
 
@@ -299,6 +170,5 @@ int wm8994_irq_init(struct wm8994 *wm8994)
 
 void wm8994_irq_exit(struct wm8994 *wm8994)
 {
-	if (wm8994->irq)
-		free_irq(wm8994->irq, wm8994);
+	regmap_del_irq_chip(wm8994->irq, wm8994->irq_data);
 }
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 4dd4409678ce..9eff2a351ec5 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -67,8 +67,7 @@ struct wm8994 {
 	int irq_base;
 
 	int irq;
-	u16 irq_masks_cur[WM8994_NUM_IRQ_REGS];
-	u16 irq_masks_cache[WM8994_NUM_IRQ_REGS];
+	struct regmap_irq_chip_data *irq_data;
 
 	/* Used over suspend/resume */
 	bool suspended;
-- 
cgit v1.2.3


From ba2780c796badfc3741c7cb499a575ca49f17e6d Mon Sep 17 00:00:00 2001
From: Manu Abraham <abraham.manu@gmail.com>
Date: Sun, 13 Nov 2011 18:47:44 -0300
Subject: [media] DVB: Query DVB frontend delivery capabilities

 Currently, for any multi-standard frontend it is assumed that it just
 has a single standard capability. This is fine in some cases, but
 makes things hard when there are incompatible standards in conjuction.
 Eg: DVB-S can be seen as a subset of DVB-S2, but the same doesn't hold
 the same for DSS. This is not specific to any driver as it is, but a
 generic issue. This was handled correctly in the multiproto tree,
 while such functionality is missing from the v5 API update.

 http://www.linuxtv.org/pipermail/vdr/2008-November/018417.html

 Later on a FE_CAN_2G_MODULATION was added as a hack to workaround this
 issue in the v5 API, but that hack is incapable of addressing the
 issue, as it can be used to simply distinguish between DVB-S and
 DVB-S2 alone, or another X vs X2 modulation. If there are more systems,
 then you have a potential issue.

 An application needs to query the device capabilities before requesting
 any operation from the device.

Signed-off-by: Manu Abraham <abraham.manu@gmail.com>
Acked-by: Andreas Oberritter <obi@linuxtv.org>
Acked-by: Oliver Endriss <o.endriss@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c | 36 +++++++++++++++++++++++++++++++
 include/linux/dvb/frontend.h              |  4 +++-
 include/linux/dvb/version.h               |  2 +-
 3 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index c849455458ea..821b2250ec70 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -974,6 +974,8 @@ static struct dtv_cmds_h dtv_cmds[DTV_MAX_COMMAND + 1] = {
 	_DTV_CMD(DTV_GUARD_INTERVAL, 0, 0),
 	_DTV_CMD(DTV_TRANSMISSION_MODE, 0, 0),
 	_DTV_CMD(DTV_HIERARCHY, 0, 0),
+
+	_DTV_CMD(DTV_ENUM_DELSYS, 0, 0),
 };
 
 static void dtv_property_dump(struct dtv_property *tvp)
@@ -1209,6 +1211,37 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 static int dvb_frontend_ioctl_properties(struct file *file,
 			unsigned int cmd, void *parg);
 
+static void dtv_set_default_delivery_caps(const struct dvb_frontend *fe, struct dtv_property *p)
+{
+	const struct dvb_frontend_info *info = &fe->ops.info;
+	u32 ncaps = 0;
+
+	switch (info->type) {
+	case FE_QPSK:
+		p->u.buffer.data[ncaps++] = SYS_DVBS;
+		if (info->caps & FE_CAN_2G_MODULATION)
+			p->u.buffer.data[ncaps++] = SYS_DVBS2;
+		if (info->caps & FE_CAN_TURBO_FEC)
+			p->u.buffer.data[ncaps++] = SYS_TURBO;
+		break;
+	case FE_QAM:
+		p->u.buffer.data[ncaps++] = SYS_DVBC_ANNEX_AC;
+		break;
+	case FE_OFDM:
+		p->u.buffer.data[ncaps++] = SYS_DVBT;
+		if (info->caps & FE_CAN_2G_MODULATION)
+			p->u.buffer.data[ncaps++] = SYS_DVBT2;
+		break;
+	case FE_ATSC:
+		if (info->caps & (FE_CAN_8VSB | FE_CAN_16VSB))
+			p->u.buffer.data[ncaps++] = SYS_ATSC;
+		if (info->caps & (FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_128 | FE_CAN_QAM_256))
+			p->u.buffer.data[ncaps++] = SYS_DVBC_ANNEX_B;
+		break;
+	}
+	p->u.buffer.len = ncaps;
+}
+
 static int dtv_property_process_get(struct dvb_frontend *fe,
 				    struct dtv_property *tvp,
 				    struct file *file)
@@ -1229,6 +1262,9 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 	}
 
 	switch(tvp->cmd) {
+	case DTV_ENUM_DELSYS:
+		dtv_set_default_delivery_caps(fe, tvp);
+		break;
 	case DTV_FREQUENCY:
 		tvp->u.data = c->frequency;
 		break;
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index d9251df867b5..cb114f52ccf7 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -316,7 +316,9 @@ struct dvb_frontend_event {
 
 #define DTV_DVBT2_PLP_ID	43
 
-#define DTV_MAX_COMMAND				DTV_DVBT2_PLP_ID
+#define DTV_ENUM_DELSYS		44
+
+#define DTV_MAX_COMMAND				DTV_ENUM_DELSYS
 
 typedef enum fe_pilot {
 	PILOT_ON,
diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h
index 66594b1d5d7b..0559e2bd38f9 100644
--- a/include/linux/dvb/version.h
+++ b/include/linux/dvb/version.h
@@ -24,6 +24,6 @@
 #define _DVBVERSION_H_
 
 #define DVB_API_VERSION 5
-#define DVB_API_VERSION_MINOR 4
+#define DVB_API_VERSION_MINOR 5
 
 #endif /*_DVBVERSION_H_*/
-- 
cgit v1.2.3


From 6682bb86fe1f3dba7e9cb1f0955775604599cea8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 09:26:45 +0000
Subject: UAPI: Guard linux/sound.h

Place reinclusion guards on linux/sound.h otherwise the UAPI splitter script
won't insert a #include to make the kernel header include the UAPI header.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/sound.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sound.h b/include/linux/sound.h
index 44dcf0570432..fae20ba01fbf 100644
--- a/include/linux/sound.h
+++ b/include/linux/sound.h
@@ -1,3 +1,5 @@
+#ifndef _LINUX_SOUND_H
+#define _LINUX_SOUND_H
 
 /*
  * Minor numbers for the sound driver.
@@ -42,3 +44,5 @@ extern void unregister_sound_mixer(int unit);
 extern void unregister_sound_midi(int unit);
 extern void unregister_sound_dsp(int unit);
 #endif /* __KERNEL__ */
+
+#endif /* _LINUX_SOUND_H */
-- 
cgit v1.2.3


From c15a48d60697f06a847c5862b94a6e24d1b48a7b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 09:26:45 +0000
Subject: UAPI: Guard linux/isdn_divertif.h

Place reinclusion guards on linux/isdn_divertif.h otherwise the UAPI splitter
script won't insert the #include to include the UAPI header from the kernel
header.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/isdn_divertif.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/isdn_divertif.h b/include/linux/isdn_divertif.h
index 07821ca5955f..a5a50f523807 100644
--- a/include/linux/isdn_divertif.h
+++ b/include/linux/isdn_divertif.h
@@ -10,6 +10,8 @@
  *
  */
 
+#ifndef _LINUX_ISDN_DIVERTIF_H
+#define _LINUX_ISDN_DIVERTIF_H
 
 /***********************************************************/
 /* magic value is also used to control version information */
@@ -45,3 +47,5 @@ typedef struct
 /*********************/
 extern int DIVERT_REG_NAME(isdn_divert_if *);
 #endif
+
+#endif /* _LINUX_ISDN_DIVERTIF_H */
-- 
cgit v1.2.3


From 6c9f2ef9b5b3a84952ba2f99472f82e3398ac3e9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 09:26:45 +0000
Subject: UAPI: Guard linux/pmu.h

Place reinclusion guards on linux/pmu.h otherwise the UAPI splitter won't
insert the #include to include the UAPI header from the kernel header.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/pmu.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pmu.h b/include/linux/pmu.h
index cafe98d96948..84e6a55a1202 100644
--- a/include/linux/pmu.h
+++ b/include/linux/pmu.h
@@ -6,6 +6,8 @@
  * Copyright (C) 1998 Paul Mackerras.
  */
 
+#ifndef _LINUX_PMU_H
+#define _LINUX_PMU_H
 
 #define PMU_DRIVER_VERSION	2
 
@@ -207,3 +209,5 @@ extern int pmu_sys_suspended;
 #endif
 
 #endif	/* __KERNEL__ */
+
+#endif /* _LINUX_PMU_H */
-- 
cgit v1.2.3


From fde28451359b476ba065a9a16fa83aa44168fe59 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 09:26:45 +0000
Subject: UAPI: Guard linux/cuda.h

Place reinclusion guards on linux/cuda.h otherwise the UAPI splitter script
won't insert a #include to make the kernel header include the UAPI header.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/cuda.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cuda.h b/include/linux/cuda.h
index 6a3e6385d3f3..9f9865ff781e 100644
--- a/include/linux/cuda.h
+++ b/include/linux/cuda.h
@@ -5,6 +5,9 @@
  * Copyright (C) 1996 Paul Mackerras.
  */
 
+#ifndef _LINUX_CUDA_H
+#define _LINUX_CUDA_H
+
 /* CUDA commands (2nd byte) */
 #define CUDA_WARM_START		0
 #define CUDA_AUTOPOLL		1
@@ -34,3 +37,5 @@ extern int cuda_request(struct adb_request *req,
 extern void cuda_poll(void);
 
 #endif	/* __KERNEL */
+
+#endif /* _LINUX_CUDA_H */
-- 
cgit v1.2.3


From b18da0c56e9ff43a007b6c8e302c62e720964151 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 13 Dec 2011 11:58:49 +0100
Subject: fuse: support ioctl on directories

Multiplexing filesystems may want to support ioctls on the underlying
files and directores (e.g. FS_IOC_{GET,SET}FLAGS).

Ioctl support on directories was missing so add it now.

Reported-by: Antonio SJ Musumeci <bile@landofbile.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c        | 26 ++++++++++++++++++++++++++
 fs/fuse/file.c       |  8 ++++----
 fs/fuse/fuse_i.h     |  2 ++
 include/linux/fuse.h |  7 ++++++-
 4 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 9f63e493a9b6..344577933f62 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1182,6 +1182,30 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
 	return fuse_fsync_common(file, start, end, datasync, 1);
 }
 
+static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
+
+	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
+	if (fc->minor < 18)
+		return -ENOTTY;
+
+	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
+}
+
+static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
+				   unsigned long arg)
+{
+	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
+
+	if (fc->minor < 18)
+		return -ENOTTY;
+
+	return fuse_ioctl_common(file, cmd, arg,
+				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
+}
+
 static bool update_mtime(unsigned ivalid)
 {
 	/* Always update if mtime is explicitly set  */
@@ -1596,6 +1620,8 @@ static const struct file_operations fuse_dir_operations = {
 	.open		= fuse_dir_open,
 	.release	= fuse_dir_release,
 	.fsync		= fuse_dir_fsync,
+	.unlocked_ioctl	= fuse_dir_ioctl,
+	.compat_ioctl	= fuse_dir_compat_ioctl,
 };
 
 static const struct inode_operations fuse_common_inode_operations = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c297425cba71..4a199fd93fbd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1926,8 +1926,8 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 }
 EXPORT_SYMBOL_GPL(fuse_do_ioctl);
 
-static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
-				   unsigned long arg, unsigned int flags)
+long fuse_ioctl_common(struct file *file, unsigned int cmd,
+		       unsigned long arg, unsigned int flags)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1944,13 +1944,13 @@ static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
 static long fuse_file_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
-	return fuse_file_ioctl_common(file, cmd, arg, 0);
+	return fuse_ioctl_common(file, cmd, arg, 0);
 }
 
 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
 				   unsigned long arg)
 {
-	return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
+	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
 }
 
 /*
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index cf6db0a93219..09337bcc2554 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -765,6 +765,8 @@ ssize_t fuse_direct_io(struct file *file, const char __user *buf,
 		       size_t count, loff_t *ppos, int write);
 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 		   unsigned int flags);
+long fuse_ioctl_common(struct file *file, unsigned int cmd,
+		       unsigned long arg, unsigned int flags);
 unsigned fuse_file_poll(struct file *file, poll_table *wait);
 int fuse_dev_release(struct inode *inode, struct file *file);
 
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 464cff526860..446c89718b9c 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -50,6 +50,9 @@
  *
  * 7.17
  *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ *  - add FUSE_IOCTL_DIR flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -81,7 +84,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 17
+#define FUSE_KERNEL_MINOR_VERSION 18
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -214,6 +217,7 @@ struct fuse_file_lock {
  * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
  * FUSE_IOCTL_RETRY: retry with new iovecs
  * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
  *
  * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
  */
@@ -221,6 +225,7 @@ struct fuse_file_lock {
 #define FUSE_IOCTL_UNRESTRICTED	(1 << 1)
 #define FUSE_IOCTL_RETRY	(1 << 2)
 #define FUSE_IOCTL_32BIT	(1 << 3)
+#define FUSE_IOCTL_DIR		(1 << 4)
 
 #define FUSE_IOCTL_MAX_IOV	256
 
-- 
cgit v1.2.3


From 451d0f599934fd97faf54a5d7954b518e66192cb Mon Sep 17 00:00:00 2001
From: John Muir <john@jmuir.com>
Date: Tue, 6 Dec 2011 21:50:06 +0100
Subject: FUSE: Notifying the kernel of deletion.

Allows a FUSE file-system to tell the kernel when a file or directory is
deleted. If the specified dentry has the specified inode number, the kernel will
unhash it.

The current 'fuse_notify_inval_entry' does not cause the kernel to clean up
directories that are in use properly, and as a result the users of those
directories see incorrect semantics from the file-system. The error condition
seen when 'fuse_notify_inval_entry' is used to notify of a deleted directory is
avoided when 'fuse_notify_delete' is used instead.

The following scenario demonstrates the difference:
1. User A chdirs into 'testdir' and starts reading 'testfile'.
2. User B rm -rf 'testdir'.
3. User B creates 'testdir'.
4. User C chdirs into 'testdir'.

If you run the above within the same machine on any file-system (including fuse
file-systems), there is no problem: user C is able to chdir into the new
testdir. The old testdir is removed from the dentry tree, but still open by user
A.

If operations 2 and 3 are performed via the network such that the fuse
file-system uses one of the notify functions to tell the kernel that the nodes
are gone, then the following error occurs for user C while user A holds the
original directory open:

muirj@empacher:~> ls /test/testdir
ls: cannot access /test/testdir: No such file or directory

The issue here is that the kernel still has a dentry for testdir, and so it is
requesting the attributes for the old directory, while the file-system is
responding that the directory no longer exists.

If on the other hand, if the file-system can notify the kernel that the
directory is deleted using the new 'fuse_notify_delete' function, then the above
ls will find the new directory as expected.

Signed-off-by: John Muir <john@jmuir.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/fuse/dir.c        | 32 +++++++++++++++++++++++++++--
 fs/fuse/fuse_i.h     |  8 +++++++-
 include/linux/fuse.h |  9 +++++++++
 4 files changed, 102 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 2aaf3eaaf13d..5f3368ab0fa9 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1378,7 +1378,59 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
 	down_read(&fc->killsb);
 	err = -ENOENT;
 	if (fc->sb)
-		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
+	up_read(&fc->killsb);
+	kfree(buf);
+	return err;
+
+err:
+	kfree(buf);
+	fuse_copy_finish(cs);
+	return err;
+}
+
+static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
+			      struct fuse_copy_state *cs)
+{
+	struct fuse_notify_delete_out outarg;
+	int err = -ENOMEM;
+	char *buf;
+	struct qstr name;
+
+	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
+	if (!buf)
+		goto err;
+
+	err = -EINVAL;
+	if (size < sizeof(outarg))
+		goto err;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto err;
+
+	err = -ENAMETOOLONG;
+	if (outarg.namelen > FUSE_NAME_MAX)
+		goto err;
+
+	err = -EINVAL;
+	if (size != sizeof(outarg) + outarg.namelen + 1)
+		goto err;
+
+	name.name = buf;
+	name.len = outarg.namelen;
+	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+	if (err)
+		goto err;
+	fuse_copy_finish(cs);
+	buf[outarg.namelen] = 0;
+	name.hash = full_name_hash(name.name, name.len);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (fc->sb)
+		err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
+					       outarg.child, &name);
 	up_read(&fc->killsb);
 	kfree(buf);
 	return err;
@@ -1597,6 +1649,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 	case FUSE_NOTIFY_RETRIEVE:
 		return fuse_notify_retrieve(fc, size, cs);
 
+	case FUSE_NOTIFY_DELETE:
+		return fuse_notify_delete(fc, size, cs);
+
 	default:
 		fuse_copy_finish(cs);
 		return -EINVAL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 344577933f62..bef8c3011d31 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -868,7 +868,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
 }
 
 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
-			     struct qstr *name)
+			     u64 child_nodeid, struct qstr *name)
 {
 	int err = -ENOTDIR;
 	struct inode *parent;
@@ -895,8 +895,36 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
 
 	fuse_invalidate_attr(parent);
 	fuse_invalidate_entry(entry);
+
+	if (child_nodeid != 0 && entry->d_inode) {
+		mutex_lock(&entry->d_inode->i_mutex);
+		if (get_node_id(entry->d_inode) != child_nodeid) {
+			err = -ENOENT;
+			goto badentry;
+		}
+		if (d_mountpoint(entry)) {
+			err = -EBUSY;
+			goto badentry;
+		}
+		if (S_ISDIR(entry->d_inode->i_mode)) {
+			shrink_dcache_parent(entry);
+			if (!simple_empty(entry)) {
+				err = -ENOTEMPTY;
+				goto badentry;
+			}
+			entry->d_inode->i_flags |= S_DEAD;
+		}
+		dont_mount(entry);
+		clear_nlink(entry->d_inode);
+		err = 0;
+ badentry:
+		mutex_unlock(&entry->d_inode->i_mutex);
+		if (!err)
+			d_delete(entry);
+	} else {
+		err = 0;
+	}
 	dput(entry);
-	err = 0;
 
  unlock:
 	mutex_unlock(&parent->i_mutex);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 09337bcc2554..a571584a091a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -755,9 +755,15 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
 /**
  * File-system tells the kernel to invalidate parent attributes and
  * the dentry matching parent/name.
+ *
+ * If the child_nodeid is non-zero and:
+ *    - matches the inode number for the dentry matching parent/name,
+ *    - is not a mount point
+ *    - is a file or oan empty directory
+ * then the dentry is unhashed (d_delete()).
  */
 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
-			     struct qstr *name);
+			     u64 child_nodeid, struct qstr *name);
 
 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 		 bool isdir);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 446c89718b9c..8ba2c9460b28 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -53,6 +53,7 @@
  *
  * 7.18
  *  - add FUSE_IOCTL_DIR flag
+ *  - add FUSE_NOTIFY_DELETE
  */
 
 #ifndef _LINUX_FUSE_H
@@ -288,6 +289,7 @@ enum fuse_notify_code {
 	FUSE_NOTIFY_INVAL_ENTRY = 3,
 	FUSE_NOTIFY_STORE = 4,
 	FUSE_NOTIFY_RETRIEVE = 5,
+	FUSE_NOTIFY_DELETE = 6,
 	FUSE_NOTIFY_CODE_MAX,
 };
 
@@ -611,6 +613,13 @@ struct fuse_notify_inval_entry_out {
 	__u32	padding;
 };
 
+struct fuse_notify_delete_out {
+	__u64	parent;
+	__u64	child;
+	__u32	namelen;
+	__u32	padding;
+};
+
 struct fuse_notify_store_out {
 	__u64	nodeid;
 	__u64	offset;
-- 
cgit v1.2.3


From a648bd0c9f613d8f9954eccff6009ecfb26e2722 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 15:07:49 +0000
Subject: UAPI: Make linux/patchkey.h easier to parse

Make linux/patchkey.h easier to parse by making the #elif case associated with
the __KERNEL__ guard a nested #if in a #else of the __KERNEL__ guard.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/patchkey.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/patchkey.h b/include/linux/patchkey.h
index d974a6e92372..aefda0ec6e62 100644
--- a/include/linux/patchkey.h
+++ b/include/linux/patchkey.h
@@ -32,7 +32,8 @@
 #  else
 #    error "could not determine byte order"
 #  endif
-#elif defined(__BYTE_ORDER)
+#else
+#if defined(__BYTE_ORDER)
 #  if __BYTE_ORDER == __BIG_ENDIAN
 #    define _PATCHKEY(id) (0xfd00|id)
 #  elif __BYTE_ORDER == __LITTLE_ENDIAN
@@ -41,5 +42,6 @@
 #    error "could not determine byte order"
 #  endif
 #endif
+#endif
 
 #endif /* _LINUX_PATCHKEY_H */
-- 
cgit v1.2.3


From 989e986f5b1748fe3ff714954dc0d23780c43a9c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 15:07:49 +0000
Subject: UAPI: Fix AHZ multiple inclusion when __KERNEL__ is removed

Fix AHZ multiple inclusion when __KERNEL__ is removed as part of the separation
of the userspace headers from the kernel headers.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/acct.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acct.h b/include/linux/acct.h
index 3e4737fa6cce..d537aa0ec414 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -146,6 +146,9 @@ extern void acct_exit_ns(struct pid_namespace *);
  *
  */
 
+#undef ACCT_VERSION
+#undef AHZ
+
 #ifdef CONFIG_BSD_PROCESS_ACCT_V3
 #define ACCT_VERSION	3
 #define AHZ		100
-- 
cgit v1.2.3


From fdc29805bd7cae133303045fc0249d76f3827613 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 15:07:49 +0000
Subject: UAPI: Don't have a #elif clause in a __KERNEL__ guard in
 linux/soundcard.h

Don't have a #elif clause in a __KERNEL__ guard in linux/soundcard.h to make
parsing easier.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/soundcard.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soundcard.h b/include/linux/soundcard.h
index fe204fe39f7c..dfcf86f013a9 100644
--- a/include/linux/soundcard.h
+++ b/include/linux/soundcard.h
@@ -198,7 +198,8 @@ typedef struct seq_event_rec {
 #  else
 #    error "could not determine byte order"
 #  endif
-#elif defined(__BYTE_ORDER)
+#else
+# if defined(__BYTE_ORDER)
 #  if __BYTE_ORDER == __BIG_ENDIAN
 #    define AFMT_S16_NE AFMT_S16_BE
 #  elif __BYTE_ORDER == __LITTLE_ENDIAN
@@ -206,6 +207,7 @@ typedef struct seq_event_rec {
 #  else
 #    error "could not determine byte order"
 #  endif
+# endif
 #endif
 
 /*
-- 
cgit v1.2.3


From 1632b9e2a14ce9f4e08faf6c4380431d63319bd3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Dec 2011 15:07:49 +0000
Subject: UAPI: Split trivial #if defined(__KERNEL__) && X conditionals

Split trivial #if defined(__KERNEL__) && X conditionals to make automated
disintegration easier.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 arch/arm/include/asm/hwcap.h       |  4 +++-
 arch/arm/include/asm/swab.h        |  7 +++++--
 arch/arm/include/asm/unistd.h      |  4 +++-
 arch/ia64/include/asm/intrinsics.h | 21 +++++++++++++--------
 arch/mips/include/asm/types.h      | 10 +++++++---
 arch/s390/include/asm/mman.h       |  4 +++-
 arch/tile/include/asm/signal.h     |  4 +++-
 include/linux/mroute6.h            |  4 +++-
 8 files changed, 40 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/hwcap.h b/arch/arm/include/asm/hwcap.h
index c93a22a8b924..917626128a1d 100644
--- a/arch/arm/include/asm/hwcap.h
+++ b/arch/arm/include/asm/hwcap.h
@@ -25,7 +25,8 @@
 #define HWCAP_IDIVT	(1 << 18)
 #define HWCAP_IDIV	(HWCAP_IDIVA | HWCAP_IDIVT)
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__)
+#if !defined(__ASSEMBLY__)
 /*
  * This yields a mask that user programs can use to figure out what
  * instruction set this cpu supports.
@@ -33,5 +34,6 @@
 #define ELF_HWCAP	(elf_hwcap)
 extern unsigned int elf_hwcap;
 #endif
+#endif
 
 #endif
diff --git a/arch/arm/include/asm/swab.h b/arch/arm/include/asm/swab.h
index 9997ad20eff1..e82adf64d1dd 100644
--- a/arch/arm/include/asm/swab.h
+++ b/arch/arm/include/asm/swab.h
@@ -22,7 +22,8 @@
 #  define __SWAB_64_THRU_32__
 #endif
 
-#if defined(__KERNEL__) && __LINUX_ARM_ARCH__ >= 6
+#if defined(__KERNEL__)
+#if __LINUX_ARM_ARCH__ >= 6
 
 static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
 {
@@ -38,8 +39,10 @@ static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
 }
 #define __arch_swab32 __arch_swab32
 
-#else
+#endif
+#endif
 
+#if !defined(__KERNEL__) || __LINUX_ARM_ARCH__ < 6
 static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
 {
 	__u32 t;
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 4a1123783806..512cd1473454 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -427,7 +427,8 @@
 /*
  * The following syscalls are obsolete and no longer available for EABI.
  */
-#if defined(__ARM_EABI__) && !defined(__KERNEL__)
+#if !defined(__KERNEL__)
+#if defined(__ARM_EABI__)
 #undef __NR_time
 #undef __NR_umount
 #undef __NR_stime
@@ -441,6 +442,7 @@
 #undef __NR_syscall
 #undef __NR_ipc
 #endif
+#endif
 
 #ifdef __KERNEL__
 
diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h
index 111ed5222892..e4076b511829 100644
--- a/arch/ia64/include/asm/intrinsics.h
+++ b/arch/ia64/include/asm/intrinsics.h
@@ -201,16 +201,21 @@ extern long ia64_cmpxchg_called_with_bad_pointer (void);
 #endif
 
 #ifndef __ASSEMBLY__
-#if defined(CONFIG_PARAVIRT) && defined(__KERNEL__)
-#ifdef ASM_SUPPORTED
-# define IA64_INTRINSIC_API(name)	paravirt_ ## name
-#else
-# define IA64_INTRINSIC_API(name)	pv_cpu_ops.name
-#endif
-#define IA64_INTRINSIC_MACRO(name)	paravirt_ ## name
-#else
+
 #define IA64_INTRINSIC_API(name)	ia64_native_ ## name
 #define IA64_INTRINSIC_MACRO(name)	ia64_native_ ## name
+
+#if defined(__KERNEL__)
+#if defined(CONFIG_PARAVIRT)
+# undef IA64_INTRINSIC_API
+# undef IA64_INTRINSIC_MACRO
+# ifdef ASM_SUPPORTED
+#  define IA64_INTRINSIC_API(name)	paravirt_ ## name
+# else
+#  define IA64_INTRINSIC_API(name)	pv_cpu_ops.name
+# endif
+#define IA64_INTRINSIC_MACRO(name)	paravirt_ ## name
+#endif
 #endif
 
 /************************************************/
diff --git a/arch/mips/include/asm/types.h b/arch/mips/include/asm/types.h
index 533812b61881..9b96461bc1e7 100644
--- a/arch/mips/include/asm/types.h
+++ b/arch/mips/include/asm/types.h
@@ -15,10 +15,14 @@
  * We don't use int-l64.h for the kernel anymore but still use it for
  * userspace to avoid code changes.
  */
-#if (_MIPS_SZLONG == 64) && !defined(__KERNEL__)
-# include <asm-generic/int-l64.h>
-#else
+#ifdef __KERNEL__
 # include <asm-generic/int-ll64.h>
+#else
+# if _MIPS_SZLONG == 64
+#  include <asm-generic/int-l64.h>
+# else
+#  include <asm-generic/int-ll64.h>
+# endif
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index 4e9c8ae0a637..d49760e63506 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -11,9 +11,11 @@
 
 #include <asm-generic/mman.h>
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
+#if defined(__KERNEL__)
+#if !defined(__ASSEMBLY__) && defined(CONFIG_64BIT)
 int s390_mmap_check(unsigned long addr, unsigned long len);
 #define arch_mmap_check(addr,len,flags)	s390_mmap_check(addr,len)
 #endif
+#endif
 
 #endif /* __S390_MMAN_H__ */
diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
index 1e1e616783eb..1e5e49aad548 100644
--- a/arch/tile/include/asm/signal.h
+++ b/arch/tile/include/asm/signal.h
@@ -23,7 +23,8 @@
 
 #include <asm-generic/signal.h>
 
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__)
+#if !defined(__ASSEMBLY__)
 struct pt_regs;
 int restore_sigcontext(struct pt_regs *, struct sigcontext __user *);
 int setup_sigcontext(struct sigcontext __user *, struct pt_regs *);
@@ -33,5 +34,6 @@ void signal_fault(const char *type, struct pt_regs *,
 void trace_unhandled_signal(const char *type, struct pt_regs *regs,
 			    unsigned long address, int signo);
 #endif
+#endif
 
 #endif /* _ASM_TILE_SIGNAL_H */
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index a3759cb0ac10..6d8c7251eb8d 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -43,9 +43,11 @@ typedef unsigned short mifi_t;
 typedef	__u32		if_mask;
 #define NIFBITS (sizeof(if_mask) * 8)        /* bits per mask */
 
-#if !defined(__KERNEL__) && !defined(DIV_ROUND_UP)
+#if !defined(__KERNEL__)
+#if !defined(DIV_ROUND_UP)
 #define	DIV_ROUND_UP(x,y)	(((x) + ((y) - 1)) / (y))
 #endif
+#endif
 
 typedef struct if_set {
 	if_mask ifs_bits[DIV_ROUND_UP(IF_SETSIZE, NIFBITS)];
-- 
cgit v1.2.3


From 1ba64edef6051d2ec79bb2fbd3a0c8f0df00ab55 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:37 +0100
Subject: block, sx8: kill blk_insert_request()

The only user left for blk_insert_request() is sx8 and it can be
trivially switched to use blk_execute_rq_nowait() - special requests
aren't included in io stat and sx8 doesn't use block layer tagging.
Switch sx8 and kill blk_insert_requeset().

This patch doesn't introduce any functional difference.

Only compile tested.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jeff Garzik <jgarzik@pobox.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 48 ------------------------------------------------
 drivers/block/sx8.c    | 12 ++++++++----
 include/linux/blkdev.h |  1 -
 3 files changed, 8 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index ea70e6c80cd3..435af2378614 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1010,54 +1010,6 @@ static void add_acct_request(struct request_queue *q, struct request *rq,
 	__elv_add_request(q, rq, where);
 }
 
-/**
- * blk_insert_request - insert a special request into a request queue
- * @q:		request queue where request should be inserted
- * @rq:		request to be inserted
- * @at_head:	insert request at head or tail of queue
- * @data:	private data
- *
- * Description:
- *    Many block devices need to execute commands asynchronously, so they don't
- *    block the whole kernel from preemption during request execution.  This is
- *    accomplished normally by inserting aritficial requests tagged as
- *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
- *    be scheduled for actual execution by the request queue.
- *
- *    We have the option of inserting the head or the tail of the queue.
- *    Typically we use the tail for new ioctls and so forth.  We use the head
- *    of the queue for things like a QUEUE_FULL message from a device, or a
- *    host that is unable to accept a particular command.
- */
-void blk_insert_request(struct request_queue *q, struct request *rq,
-			int at_head, void *data)
-{
-	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
-	unsigned long flags;
-
-	/*
-	 * tell I/O scheduler that this isn't a regular read/write (ie it
-	 * must not attempt merges on this) and that it acts as a soft
-	 * barrier
-	 */
-	rq->cmd_type = REQ_TYPE_SPECIAL;
-
-	rq->special = data;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-
-	/*
-	 * If command is tagged, release the tag
-	 */
-	if (blk_rq_tagged(rq))
-		blk_queue_end_tag(q, rq);
-
-	add_acct_request(q, rq, where);
-	__blk_run_queue(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-EXPORT_SYMBOL(blk_insert_request);
-
 static void part_round_stats_single(int cpu, struct hd_struct *part,
 				    unsigned long now)
 {
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index b70f0fca9a42..e7472f567c9d 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -619,8 +619,10 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
 	       host->state == HST_DEV_SCAN);
 	spin_unlock_irq(&host->lock);
 
-	DPRINTK("blk_insert_request, tag == %u\n", idx);
-	blk_insert_request(host->oob_q, crq->rq, 1, crq);
+	DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
+	crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+	crq->rq->special = crq;
+	blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
 
 	return 0;
 
@@ -658,8 +660,10 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
 	BUG_ON(rc < 0);
 	crq->msg_bucket = (u32) rc;
 
-	DPRINTK("blk_insert_request, tag == %u\n", idx);
-	blk_insert_request(host->oob_q, crq->rq, 1, crq);
+	DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
+	crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+	crq->rq->special = crq;
+	blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
 
 	return 0;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c7a6d3b5bc7b..8a6b51b13a1c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -660,7 +660,6 @@ extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern struct request *blk_make_request(struct request_queue *, struct bio *,
 					gfp_t);
-extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
-- 
cgit v1.2.3


From 34f6055c80285e4efb3f602a9119db75239744dc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:37 +0100
Subject: block: add blk_queue_dead()

There are a number of QUEUE_FLAG_DEAD tests.  Add blk_queue_dead()
macro and use it.

This patch doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 6 +++---
 block/blk-exec.c       | 2 +-
 block/blk-sysfs.c      | 4 ++--
 block/blk-throttle.c   | 4 ++--
 block/blk.h            | 2 +-
 include/linux/blkdev.h | 1 +
 6 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 435af2378614..b5ed4f4a8d96 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -608,7 +608,7 @@ EXPORT_SYMBOL(blk_init_allocated_queue_node);
 
 int blk_get_queue(struct request_queue *q)
 {
-	if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+	if (likely(!blk_queue_dead(q))) {
 		kobject_get(&q->kobj);
 		return 0;
 	}
@@ -755,7 +755,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	int may_queue;
 
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+	if (unlikely(blk_queue_dead(q)))
 		return NULL;
 
 	may_queue = elv_may_queue(q, rw_flags);
@@ -875,7 +875,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
 		struct io_context *ioc;
 		struct request_list *rl = &q->rq;
 
-		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+		if (unlikely(blk_queue_dead(q)))
 			return NULL;
 
 		prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
diff --git a/block/blk-exec.c b/block/blk-exec.c
index a1ebceb332f9..60532852b3ab 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -50,7 +50,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 {
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+	if (unlikely(blk_queue_dead(q))) {
 		rq->errors = -ENXIO;
 		if (rq->end_io)
 			rq->end_io(rq, rq->errors);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e7f9f657f105..f0b2ca8f66d0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -425,7 +425,7 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 	if (!entry->show)
 		return -EIO;
 	mutex_lock(&q->sysfs_lock);
-	if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
+	if (blk_queue_dead(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
@@ -447,7 +447,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
 
 	q = container_of(kobj, struct request_queue, kobj);
 	mutex_lock(&q->sysfs_lock);
-	if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
+	if (blk_queue_dead(q)) {
 		mutex_unlock(&q->sysfs_lock);
 		return -ENOENT;
 	}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 4553245d9317..5eed6a76721d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -310,7 +310,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
 	struct request_queue *q = td->queue;
 
 	/* no throttling for dead queue */
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+	if (unlikely(blk_queue_dead(q)))
 		return NULL;
 
 	rcu_read_lock();
@@ -335,7 +335,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td)
 	spin_lock_irq(q->queue_lock);
 
 	/* Make sure @q is still alive */
-	if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
+	if (unlikely(blk_queue_dead(q))) {
 		kfree(tg);
 		return NULL;
 	}
diff --git a/block/blk.h b/block/blk.h
index 3f6551b3c92d..e38691dbb329 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -85,7 +85,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			q->flush_queue_delayed = 1;
 			return NULL;
 		}
-		if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags) ||
+		if (unlikely(blk_queue_dead(q)) ||
 		    !q->elevator->ops->elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8a6b51b13a1c..783f97c14d0a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -481,6 +481,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
-- 
cgit v1.2.3


From a73f730d013ff2788389fd0c46ad3e5510f124e6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:37 +0100
Subject: block, cfq: move cfqd->cic_index to q->id

cfq allocates per-queue id using ida and uses it to index cic radix
tree from io_context.  Move it to q->id and allocate on queue init and
free on queue release.  This simplifies cfq a bit and will allow for
further improvements of io context life-cycle management.

This patch doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 24 +++++++++++++++--------
 block/blk-sysfs.c      |  2 ++
 block/blk.h            |  3 +++
 block/cfq-iosched.c    | 52 +++++---------------------------------------------
 include/linux/blkdev.h |  6 ++++++
 5 files changed, 32 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 30add45a87ef..af7301581172 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
+DEFINE_IDA(blk_queue_ida);
+
 /*
  * For the allocated request tables
  */
@@ -474,6 +476,10 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (!q)
 		return NULL;
 
+	q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
+	if (q->id < 0)
+		goto fail_q;
+
 	q->backing_dev_info.ra_pages =
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
@@ -481,15 +487,11 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->backing_dev_info.name = "block";
 
 	err = bdi_init(&q->backing_dev_info);
-	if (err) {
-		kmem_cache_free(blk_requestq_cachep, q);
-		return NULL;
-	}
+	if (err)
+		goto fail_id;
 
-	if (blk_throtl_init(q)) {
-		kmem_cache_free(blk_requestq_cachep, q);
-		return NULL;
-	}
+	if (blk_throtl_init(q))
+		goto fail_id;
 
 	setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
 		    laptop_mode_timer_fn, (unsigned long) q);
@@ -512,6 +514,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	q->queue_lock = &q->__queue_lock;
 
 	return q;
+
+fail_id:
+	ida_simple_remove(&blk_queue_ida, q->id);
+fail_q:
+	kmem_cache_free(blk_requestq_cachep, q);
+	return NULL;
 }
 EXPORT_SYMBOL(blk_alloc_queue_node);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index f0b2ca8f66d0..5b4b4ab5e785 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -494,6 +494,8 @@ static void blk_release_queue(struct kobject *kobj)
 	blk_trace_shutdown(q);
 
 	bdi_destroy(&q->backing_dev_info);
+
+	ida_simple_remove(&blk_queue_ida, q->id);
 	kmem_cache_free(blk_requestq_cachep, q);
 }
 
diff --git a/block/blk.h b/block/blk.h
index e38691dbb329..aae4d88fc523 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -1,6 +1,8 @@
 #ifndef BLK_INTERNAL_H
 #define BLK_INTERNAL_H
 
+#include <linux/idr.h>
+
 /* Amount of time in which a process may batch requests */
 #define BLK_BATCH_TIME	(HZ/50UL)
 
@@ -9,6 +11,7 @@
 
 extern struct kmem_cache *blk_requestq_cachep;
 extern struct kobj_type blk_queue_ktype;
+extern struct ida blk_queue_ida;
 
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 16ace89613bc..ec3f5e8ba564 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -65,9 +65,6 @@ static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
 static struct completion *ioc_gone;
 static DEFINE_SPINLOCK(ioc_gone_lock);
 
-static DEFINE_SPINLOCK(cic_index_lock);
-static DEFINE_IDA(cic_index_ida);
-
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
@@ -290,7 +287,6 @@ struct cfq_data {
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
 
-	unsigned int cic_index;
 	struct list_head cic_list;
 
 	/*
@@ -484,7 +480,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
 
 static inline void *cfqd_dead_key(struct cfq_data *cfqd)
 {
-	return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
+	return (void *)(cfqd->queue->id << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
 }
 
 static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
@@ -3105,7 +3101,7 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
 	BUG_ON(rcu_dereference_check(ioc->ioc_data,
 		lockdep_is_held(&ioc->lock)) == cic);
 
-	radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
+	radix_tree_delete(&ioc->radix_root, cfqd->queue->id);
 	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
@@ -3133,7 +3129,7 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 	}
 
 	do {
-		cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
+		cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
 		rcu_read_unlock();
 		if (!cic)
 			break;
@@ -3169,8 +3165,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 		cic->key = cfqd;
 
 		spin_lock_irqsave(&ioc->lock, flags);
-		ret = radix_tree_insert(&ioc->radix_root,
-						cfqd->cic_index, cic);
+		ret = radix_tree_insert(&ioc->radix_root, cfqd->queue->id, cic);
 		if (!ret)
 			hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
 		spin_unlock_irqrestore(&ioc->lock, flags);
@@ -3944,10 +3939,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
 
 	cfq_shutdown_timer_wq(cfqd);
 
-	spin_lock(&cic_index_lock);
-	ida_remove(&cic_index_ida, cfqd->cic_index);
-	spin_unlock(&cic_index_lock);
-
 	/*
 	 * Wait for cfqg->blkg->key accessors to exit their grace periods.
 	 * Do this wait only if there are other unlinked groups out
@@ -3969,24 +3960,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
 	kfree(cfqd);
 }
 
-static int cfq_alloc_cic_index(void)
-{
-	int index, error;
-
-	do {
-		if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
-			return -ENOMEM;
-
-		spin_lock(&cic_index_lock);
-		error = ida_get_new(&cic_index_ida, &index);
-		spin_unlock(&cic_index_lock);
-		if (error && error != -EAGAIN)
-			return error;
-	} while (error);
-
-	return index;
-}
-
 static void *cfq_init_queue(struct request_queue *q)
 {
 	struct cfq_data *cfqd;
@@ -3994,23 +3967,9 @@ static void *cfq_init_queue(struct request_queue *q)
 	struct cfq_group *cfqg;
 	struct cfq_rb_root *st;
 
-	i = cfq_alloc_cic_index();
-	if (i < 0)
-		return NULL;
-
 	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
-	if (!cfqd) {
-		spin_lock(&cic_index_lock);
-		ida_remove(&cic_index_ida, i);
-		spin_unlock(&cic_index_lock);
+	if (!cfqd)
 		return NULL;
-	}
-
-	/*
-	 * Don't need take queue_lock in the routine, since we are
-	 * initializing the ioscheduler, and nobody is using cfqd
-	 */
-	cfqd->cic_index = i;
 
 	/* Init root service tree */
 	cfqd->grp_service_tree = CFQ_RB_ROOT;
@@ -4294,7 +4253,6 @@ static void __exit cfq_exit(void)
 	 */
 	if (elv_ioc_count_read(cfq_ioc_count))
 		wait_for_completion(&all_gone);
-	ida_destroy(&cic_index_ida);
 	cfq_slab_kill();
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 783f97c14d0a..8c8dbc4738ea 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -310,6 +310,12 @@ struct request_queue {
 	 */
 	unsigned long		queue_flags;
 
+	/*
+	 * ida allocated id for this queue.  Used to index queues from
+	 * ioctx.
+	 */
+	int			id;
+
 	/*
 	 * queue needs bounce pages for pages above this limit
 	 */
-- 
cgit v1.2.3


From 42ec57a8f68311bbbf4ff96a5d33c8a2e90b9d05 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:37 +0100
Subject: block: misc ioc cleanups

* int return from put_io_context() wasn't used by anybody.  Make it
  return void like other put functions and docbook-fy the function
  comment.

* Reorder dummy declarations for !CONFIG_BLOCK case a bit.

* Make alloc_ioc_context() use __GFP_ZERO allocation, take init out of
  if block and drop 0'ing.

* Docbook-fy current_io_context() comment.

This patch doesn't introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ioc.c           | 72 +++++++++++++++++++++++------------------------
 include/linux/iocontext.h | 12 ++------
 2 files changed, 39 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 6f9bbd978653..8bebf06bac76 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -27,26 +27,28 @@ static void cfq_dtor(struct io_context *ioc)
 	}
 }
 
-/*
- * IO Context helper functions. put_io_context() returns 1 if there are no
- * more users of this io context, 0 otherwise.
+/**
+ * put_io_context - put a reference of io_context
+ * @ioc: io_context to put
+ *
+ * Decrement reference count of @ioc and release it if the count reaches
+ * zero.
  */
-int put_io_context(struct io_context *ioc)
+void put_io_context(struct io_context *ioc)
 {
 	if (ioc == NULL)
-		return 1;
+		return;
 
-	BUG_ON(atomic_long_read(&ioc->refcount) == 0);
+	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
 
-	if (atomic_long_dec_and_test(&ioc->refcount)) {
-		rcu_read_lock();
-		cfq_dtor(ioc);
-		rcu_read_unlock();
+	if (!atomic_long_dec_and_test(&ioc->refcount))
+		return;
 
-		kmem_cache_free(iocontext_cachep, ioc);
-		return 1;
-	}
-	return 0;
+	rcu_read_lock();
+	cfq_dtor(ioc);
+	rcu_read_unlock();
+
+	kmem_cache_free(iocontext_cachep, ioc);
 }
 EXPORT_SYMBOL(put_io_context);
 
@@ -84,33 +86,31 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 {
 	struct io_context *ioc;
 
-	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
-	if (ioc) {
-		atomic_long_set(&ioc->refcount, 1);
-		atomic_set(&ioc->nr_tasks, 1);
-		spin_lock_init(&ioc->lock);
-		ioc->ioprio_changed = 0;
-		ioc->ioprio = 0;
-		ioc->last_waited = 0; /* doesn't matter... */
-		ioc->nr_batch_requests = 0; /* because this is 0 */
-		INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
-		INIT_HLIST_HEAD(&ioc->cic_list);
-		ioc->ioc_data = NULL;
-#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
-		ioc->cgroup_changed = 0;
-#endif
-	}
+	ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
+				    node);
+	if (unlikely(!ioc))
+		return NULL;
+
+	/* initialize */
+	atomic_long_set(&ioc->refcount, 1);
+	atomic_set(&ioc->nr_tasks, 1);
+	spin_lock_init(&ioc->lock);
+	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
+	INIT_HLIST_HEAD(&ioc->cic_list);
 
 	return ioc;
 }
 
-/*
- * If the current task has no IO context then create one and initialise it.
- * Otherwise, return its existing IO context.
+/**
+ * current_io_context - get io_context of %current
+ * @gfp_flags: allocation flags, used if allocation is necessary
+ * @node: allocation node, used if allocation is necessary
  *
- * This returned IO context doesn't have a specifically elevated refcount,
- * but since the current task itself holds a reference, the context can be
- * used in general code, so long as it stays within `current` context.
+ * Return io_context of %current.  If it doesn't exist, it is created with
+ * @gfp_flags and @node.  The returned io_context does NOT have its
+ * reference count incremented.  Because io_context is exited only on task
+ * exit, %current can be sure that the returned io_context is valid and
+ * alive as long as it is executing.
  */
 struct io_context *current_io_context(gfp_t gfp_flags, int node)
 {
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 5037a0ad2312..8a6ecb66346f 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -76,20 +76,14 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 
 struct task_struct;
 #ifdef CONFIG_BLOCK
-int put_io_context(struct io_context *ioc);
+void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_io_context(gfp_t gfp_flags, int node);
 struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
 #else
-static inline void exit_io_context(struct task_struct *task)
-{
-}
-
 struct io_context;
-static inline int put_io_context(struct io_context *ioc)
-{
-	return 1;
-}
+static inline void put_io_context(struct io_context *ioc) { }
+static inline void exit_io_context(struct task_struct *task) { }
 #endif
 
 #endif
-- 
cgit v1.2.3


From 6e736be7f282fff705db7c34a15313281b372a76 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:38 +0100
Subject: block: make ioc get/put interface more conventional and fix race on
 alloction

Ignoring copy_io() during fork, io_context can be allocated from two
places - current_io_context() and set_task_ioprio().  The former is
always called from local task while the latter can be called from
different task.  The synchornization between them are peculiar and
dubious.

* current_io_context() doesn't grab task_lock() and assumes that if it
  saw %NULL ->io_context, it would stay that way until allocation and
  assignment is complete.  It has smp_wmb() between alloc/init and
  assignment.

* set_task_ioprio() grabs task_lock() for assignment and does
  smp_read_barrier_depends() between "ioc = task->io_context" and "if
  (ioc)".  Unfortunately, this doesn't achieve anything - the latter
  is not a dependent load of the former.  ie, if ioc itself were being
  dereferenced "ioc->xxx", it would mean something (not sure what tho)
  but as the code currently stands, the dependent read barrier is
  noop.

As only one of the the two test-assignment sequences is task_lock()
protected, the task_lock() can't do much about race between the two.
Nothing prevents current_io_context() and set_task_ioprio() allocating
its own ioc for the same task and overwriting the other's.

Also, set_task_ioprio() can race with exiting task and create a new
ioc after exit_io_context() is finished.

ioc get/put doesn't have any reason to be complex.  The only hot path
is accessing the existing ioc of %current, which is simple to achieve
given that ->io_context is never destroyed as long as the task is
alive.  All other paths can happily go through task_lock() like all
other task sub structures without impacting anything.

This patch updates ioc get/put so that it becomes more conventional.

* alloc_io_context() is replaced with get_task_io_context().  This is
  the only interface which can acquire access to ioc of another task.
  On return, the caller has an explicit reference to the object which
  should be put using put_io_context() afterwards.

* The functionality of current_io_context() remains the same but when
  creating a new ioc, it shares the code path with
  get_task_io_context() and always goes through task_lock().

* get_io_context() now means incrementing ref on an ioc which the
  caller already has access to (be that an explicit refcnt or implicit
  %current one).

* PF_EXITING inhibits creation of new io_context and once
  exit_io_context() is finished, it's guaranteed that both ioc
  acquisition functions return %NULL.

* All users are updated.  Most are trivial but
  smp_read_barrier_depends() removal from cfq_get_io_context() needs a
  bit of explanation.  I suppose the original intention was to ensure
  ioc->ioprio is visible when set_task_ioprio() allocates new
  io_context and installs it; however, this wouldn't have worked
  because set_task_ioprio() doesn't have wmb between init and install.
  There are other problems with this which will be fixed in another
  patch.

* While at it, use NUMA_NO_NODE instead of -1 for wildcard node
  specification.

-v2: Vivek spotted contamination from debug patch.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c        |  9 +++--
 block/blk-ioc.c           | 99 +++++++++++++++++++++++++++++++----------------
 block/blk.h               |  1 +
 block/cfq-iosched.c       | 18 ++++-----
 fs/ioprio.c               | 21 ++--------
 include/linux/iocontext.h |  4 +-
 kernel/fork.c             |  8 ++--
 7 files changed, 91 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8f630cec906e..4b001dcd85b0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1645,11 +1645,12 @@ static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 	struct io_context *ioc;
 
-	task_lock(tsk);
-	ioc = tsk->io_context;
-	if (ioc)
+	/* we don't lose anything even if ioc allocation fails */
+	ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE);
+	if (ioc) {
 		ioc->cgroup_changed = 1;
-	task_unlock(tsk);
+		put_io_context(ioc);
+	}
 }
 
 void blkio_policy_register(struct blkio_policy_type *blkiop)
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 8bebf06bac76..b13ed96776c2 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -16,6 +16,19 @@
  */
 static struct kmem_cache *iocontext_cachep;
 
+/**
+ * get_io_context - increment reference count to io_context
+ * @ioc: io_context to get
+ *
+ * Increment reference count to @ioc.
+ */
+void get_io_context(struct io_context *ioc)
+{
+	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
+	atomic_long_inc(&ioc->refcount);
+}
+EXPORT_SYMBOL(get_io_context);
+
 static void cfq_dtor(struct io_context *ioc)
 {
 	if (!hlist_empty(&ioc->cic_list)) {
@@ -71,6 +84,9 @@ void exit_io_context(struct task_struct *task)
 {
 	struct io_context *ioc;
 
+	/* PF_EXITING prevents new io_context from being attached to @task */
+	WARN_ON_ONCE(!(current->flags & PF_EXITING));
+
 	task_lock(task);
 	ioc = task->io_context;
 	task->io_context = NULL;
@@ -82,7 +98,9 @@ void exit_io_context(struct task_struct *task)
 	put_io_context(ioc);
 }
 
-struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
+static struct io_context *create_task_io_context(struct task_struct *task,
+						 gfp_t gfp_flags, int node,
+						 bool take_ref)
 {
 	struct io_context *ioc;
 
@@ -98,6 +116,20 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
 	INIT_HLIST_HEAD(&ioc->cic_list);
 
+	/* try to install, somebody might already have beaten us to it */
+	task_lock(task);
+
+	if (!task->io_context && !(task->flags & PF_EXITING)) {
+		task->io_context = ioc;
+	} else {
+		kmem_cache_free(iocontext_cachep, ioc);
+		ioc = task->io_context;
+	}
+
+	if (ioc && take_ref)
+		get_io_context(ioc);
+
+	task_unlock(task);
 	return ioc;
 }
 
@@ -114,46 +146,47 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
  */
 struct io_context *current_io_context(gfp_t gfp_flags, int node)
 {
-	struct task_struct *tsk = current;
-	struct io_context *ret;
-
-	ret = tsk->io_context;
-	if (likely(ret))
-		return ret;
-
-	ret = alloc_io_context(gfp_flags, node);
-	if (ret) {
-		/* make sure set_task_ioprio() sees the settings above */
-		smp_wmb();
-		tsk->io_context = ret;
-	}
+	might_sleep_if(gfp_flags & __GFP_WAIT);
 
-	return ret;
+	if (current->io_context)
+		return current->io_context;
+
+	return create_task_io_context(current, gfp_flags, node, false);
 }
+EXPORT_SYMBOL(current_io_context);
 
-/*
- * If the current task has no IO context then create one and initialise it.
- * If it does have a context, take a ref on it.
+/**
+ * get_task_io_context - get io_context of a task
+ * @task: task of interest
+ * @gfp_flags: allocation flags, used if allocation is necessary
+ * @node: allocation node, used if allocation is necessary
+ *
+ * Return io_context of @task.  If it doesn't exist, it is created with
+ * @gfp_flags and @node.  The returned io_context has its reference count
+ * incremented.
  *
- * This is always called in the context of the task which submitted the I/O.
+ * This function always goes through task_lock() and it's better to use
+ * current_io_context() + get_io_context() for %current.
  */
-struct io_context *get_io_context(gfp_t gfp_flags, int node)
+struct io_context *get_task_io_context(struct task_struct *task,
+				       gfp_t gfp_flags, int node)
 {
-	struct io_context *ioc = NULL;
-
-	/*
-	 * Check for unlikely race with exiting task. ioc ref count is
-	 * zero when ioc is being detached.
-	 */
-	do {
-		ioc = current_io_context(gfp_flags, node);
-		if (unlikely(!ioc))
-			break;
-	} while (!atomic_long_inc_not_zero(&ioc->refcount));
+	struct io_context *ioc;
 
-	return ioc;
+	might_sleep_if(gfp_flags & __GFP_WAIT);
+
+	task_lock(task);
+	ioc = task->io_context;
+	if (likely(ioc)) {
+		get_io_context(ioc);
+		task_unlock(task);
+		return ioc;
+	}
+	task_unlock(task);
+
+	return create_task_io_context(task, gfp_flags, node, true);
 }
-EXPORT_SYMBOL(get_io_context);
+EXPORT_SYMBOL(get_task_io_context);
 
 static int __init blk_ioc_init(void)
 {
diff --git a/block/blk.h b/block/blk.h
index aae4d88fc523..fc3c41b2fd24 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -122,6 +122,7 @@ static inline int blk_should_fake_timeout(struct request_queue *q)
 }
 #endif
 
+void get_io_context(struct io_context *ioc);
 struct io_context *current_io_context(gfp_t gfp_flags, int node);
 
 int ll_back_merge_fn(struct request_queue *q, struct request *req,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ec3f5e8ba564..d42d89ccce1b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -14,6 +14,7 @@
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
 #include <linux/blktrace_api.h>
+#include "blk.h"
 #include "cfq.h"
 
 /*
@@ -3194,13 +3195,13 @@ static struct cfq_io_context *
 cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct io_context *ioc = NULL;
-	struct cfq_io_context *cic;
+	struct cfq_io_context *cic = NULL;
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
-	ioc = get_io_context(gfp_mask, cfqd->queue->node);
+	ioc = current_io_context(gfp_mask, cfqd->queue->node);
 	if (!ioc)
-		return NULL;
+		goto err;
 
 	cic = cfq_cic_lookup(cfqd, ioc);
 	if (cic)
@@ -3211,10 +3212,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 		goto err;
 
 	if (cfq_cic_link(cfqd, ioc, cic, gfp_mask))
-		goto err_free;
-
+		goto err;
 out:
-	smp_read_barrier_depends();
+	get_io_context(ioc);
+
 	if (unlikely(ioc->ioprio_changed))
 		cfq_ioc_set_ioprio(ioc);
 
@@ -3223,10 +3224,9 @@ out:
 		cfq_ioc_set_cgroup(ioc);
 #endif
 	return cic;
-err_free:
-	cfq_cic_free(cic);
 err:
-	put_io_context(ioc);
+	if (cic)
+		cfq_cic_free(cic);
 	return NULL;
 }
 
diff --git a/fs/ioprio.c b/fs/ioprio.c
index f79dab83e17b..998ec239d1ea 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -48,28 +48,13 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
 	if (err)
 		return err;
 
-	task_lock(task);
-	do {
-		ioc = task->io_context;
-		/* see wmb() in current_io_context() */
-		smp_read_barrier_depends();
-		if (ioc)
-			break;
-
-		ioc = alloc_io_context(GFP_ATOMIC, -1);
-		if (!ioc) {
-			err = -ENOMEM;
-			break;
-		}
-		task->io_context = ioc;
-	} while (1);
-
-	if (!err) {
+	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
+	if (ioc) {
 		ioc->ioprio = ioprio;
 		ioc->ioprio_changed = 1;
+		put_io_context(ioc);
 	}
 
-	task_unlock(task);
 	return err;
 }
 EXPORT_SYMBOL_GPL(set_task_ioprio);
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 8a6ecb66346f..28bb621ef5a2 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -78,8 +78,8 @@ struct task_struct;
 #ifdef CONFIG_BLOCK
 void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
-struct io_context *get_io_context(gfp_t gfp_flags, int node);
-struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+struct io_context *get_task_io_context(struct task_struct *task,
+				       gfp_t gfp_flags, int node);
 #else
 struct io_context;
 static inline void put_io_context(struct io_context *ioc) { }
diff --git a/kernel/fork.c b/kernel/fork.c
index da4a6a10d088..5bcfc739bb7c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -870,6 +870,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 {
 #ifdef CONFIG_BLOCK
 	struct io_context *ioc = current->io_context;
+	struct io_context *new_ioc;
 
 	if (!ioc)
 		return 0;
@@ -881,11 +882,12 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 		if (unlikely(!tsk->io_context))
 			return -ENOMEM;
 	} else if (ioprio_valid(ioc->ioprio)) {
-		tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
-		if (unlikely(!tsk->io_context))
+		new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
+		if (unlikely(!new_ioc))
 			return -ENOMEM;
 
-		tsk->io_context->ioprio = ioc->ioprio;
+		new_ioc->ioprio = ioc->ioprio;
+		put_io_context(new_ioc);
 	}
 #endif
 	return 0;
-- 
cgit v1.2.3


From 09ac46c429464c919d04bb737b27edd84d944f02 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:38 +0100
Subject: block: misc updates to blk_get_queue()

* blk_get_queue() is peculiar in that it returns 0 on success and 1 on
  failure instead of 0 / -errno or boolean.  Update it such that it
  returns %true on success and %false on failure.

* Make sure the caller checks for the return value.

* Separate out __blk_get_queue() which doesn't check whether @q is
  dead and put it in blk.h.  This will be used later.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c         | 8 ++++----
 block/blk.h              | 5 +++++
 block/bsg.c              | 4 +---
 block/genhd.c            | 2 +-
 drivers/scsi/scsi_scan.c | 2 +-
 include/linux/blkdev.h   | 2 +-
 6 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index af7301581172..fd4749391e17 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -626,14 +626,14 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
 }
 EXPORT_SYMBOL(blk_init_allocated_queue_node);
 
-int blk_get_queue(struct request_queue *q)
+bool blk_get_queue(struct request_queue *q)
 {
 	if (likely(!blk_queue_dead(q))) {
-		kobject_get(&q->kobj);
-		return 0;
+		__blk_get_queue(q);
+		return true;
 	}
 
-	return 1;
+	return false;
 }
 EXPORT_SYMBOL(blk_get_queue);
 
diff --git a/block/blk.h b/block/blk.h
index fc3c41b2fd24..8d421156fefb 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,11 @@ extern struct kmem_cache *blk_requestq_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
+static inline void __blk_get_queue(struct request_queue *q)
+{
+	kobject_get(&q->kobj);
+}
+
 void init_request_from_bio(struct request *req, struct bio *bio);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 			struct bio *bio);
diff --git a/block/bsg.c b/block/bsg.c
index 702f1316bb8f..167d586cece6 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -769,12 +769,10 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
 					 struct file *file)
 {
 	struct bsg_device *bd;
-	int ret;
 #ifdef BSG_DEBUG
 	unsigned char buf[32];
 #endif
-	ret = blk_get_queue(rq);
-	if (ret)
+	if (!blk_get_queue(rq))
 		return ERR_PTR(-ENXIO);
 
 	bd = bsg_alloc_device();
diff --git a/block/genhd.c b/block/genhd.c
index 02e9fca80825..c958169d24f0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -615,7 +615,7 @@ void add_disk(struct gendisk *disk)
 	 * Take an extra ref on queue which will be put on disk_release()
 	 * so that it sticks around as long as @disk is there.
 	 */
-	WARN_ON_ONCE(blk_get_queue(disk->queue));
+	WARN_ON_ONCE(!blk_get_queue(disk->queue));
 
 	retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
 				   "bdi");
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index b3c6d957fbd8..89da43f73c00 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -297,7 +297,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 		kfree(sdev);
 		goto out;
 	}
-	blk_get_queue(sdev->request_queue);
+	WARN_ON_ONCE(!blk_get_queue(sdev->request_queue));
 	sdev->request_queue->queuedata = sdev;
 	scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8c8dbc4738ea..d1b6f4ed1f96 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,7 +865,7 @@ extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatte
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
 
-int blk_get_queue(struct request_queue *);
+bool __must_check blk_get_queue(struct request_queue *);
 struct request_queue *blk_alloc_queue(gfp_t);
 struct request_queue *blk_alloc_queue_node(gfp_t, int);
 extern void blk_put_queue(struct request_queue *);
-- 
cgit v1.2.3


From 283287a52e3c3f7f8f9da747f4b8c5202740d776 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:38 +0100
Subject: block, cfq: misc updates to cfq_io_context

Make the following changes to prepare for ioc/cic management cleanup.

* Add cic->q so that ioc can determine the associated queue without
  querying cfq.  This will eventually replace ->key.

* Factor out cfq_release_cic() from cic_free_func().  This function
  assumes that the caller handled locking.

* Rename __cfq_exit_single_io_context() to cfq_exit_cic() and make it
  take only @cic.

* Restructure cfq_cic_link() for future updates.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c       | 58 ++++++++++++++++++++++++++---------------------
 include/linux/iocontext.h |  1 +
 2 files changed, 33 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d42d89ccce1b..a612ca65f371 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2709,21 +2709,26 @@ static void cfq_cic_free(struct cfq_io_context *cic)
 	call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
 }
 
-static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
+static void cfq_release_cic(struct cfq_io_context *cic)
 {
-	unsigned long flags;
+	struct io_context *ioc = cic->ioc;
 	unsigned long dead_key = (unsigned long) cic->key;
 
 	BUG_ON(!(dead_key & CIC_DEAD_KEY));
-
-	spin_lock_irqsave(&ioc->lock, flags);
 	radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
 	hlist_del_rcu(&cic->cic_list);
-	spin_unlock_irqrestore(&ioc->lock, flags);
-
 	cfq_cic_free(cic);
 }
 
+static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->lock, flags);
+	cfq_release_cic(cic);
+	spin_unlock_irqrestore(&ioc->lock, flags);
+}
+
 /*
  * Must be called with rcu_read_lock() held or preemption otherwise disabled.
  * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
@@ -2773,9 +2778,9 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_put_queue(cfqq);
 }
 
-static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
-					 struct cfq_io_context *cic)
+static void cfq_exit_cic(struct cfq_io_context *cic)
 {
+	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	struct io_context *ioc = cic->ioc;
 
 	list_del_init(&cic->queue_list);
@@ -2823,7 +2828,7 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
 		 */
 		smp_read_barrier_depends();
 		if (cic->key == cfqd)
-			__cfq_exit_single_io_context(cfqd, cic);
+			cfq_exit_cic(cic);
 
 		spin_unlock_irqrestore(q->queue_lock, flags);
 	}
@@ -3161,28 +3166,29 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 	int ret;
 
 	ret = radix_tree_preload(gfp_mask);
-	if (!ret) {
-		cic->ioc = ioc;
-		cic->key = cfqd;
+	if (ret)
+		goto out;
 
-		spin_lock_irqsave(&ioc->lock, flags);
-		ret = radix_tree_insert(&ioc->radix_root, cfqd->queue->id, cic);
-		if (!ret)
-			hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
-		spin_unlock_irqrestore(&ioc->lock, flags);
+	cic->ioc = ioc;
+	cic->key = cfqd;
+	cic->q = cfqd->queue;
+
+	spin_lock_irqsave(&ioc->lock, flags);
+	ret = radix_tree_insert(&ioc->radix_root, cfqd->queue->id, cic);
+	if (!ret)
+		hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
+	spin_unlock_irqrestore(&ioc->lock, flags);
 
-		radix_tree_preload_end();
+	radix_tree_preload_end();
 
-		if (!ret) {
-			spin_lock_irqsave(cfqd->queue->queue_lock, flags);
-			list_add(&cic->queue_list, &cfqd->cic_list);
-			spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
-		}
+	if (!ret) {
+		spin_lock_irqsave(cfqd->queue->queue_lock, flags);
+		list_add(&cic->queue_list, &cfqd->cic_list);
+		spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 	}
-
+out:
 	if (ret)
 		printk(KERN_ERR "cfq: cic link failed!\n");
-
 	return ret;
 }
 
@@ -3922,7 +3928,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
 							struct cfq_io_context,
 							queue_list);
 
-		__cfq_exit_single_io_context(cfqd, cic);
+		cfq_exit_cic(cic);
 	}
 
 	cfq_put_async_queues(cfqd);
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 28bb621ef5a2..079aea8fd8a8 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -15,6 +15,7 @@ struct cfq_ttime {
 
 struct cfq_io_context {
 	void *key;
+	struct request_queue *q;
 
 	struct cfq_queue *cfqq[2];
 
-- 
cgit v1.2.3


From dc86900e0a8f665122de6faadd27fb4c6d2b3e4d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:38 +0100
Subject: block, cfq: move ioc ioprio/cgroup changed handling to cic

ioprio/cgroup change was handled by marking the changed state in ioc
and, on the following access to the ioc, performing RCU-protected
iteration through all cic's grabbing the matching queue_lock.

This patch moves the changed state to each cic.  When ioprio or cgroup
changes, the respective bit is set on all cic's of the ioc and when
each of those cic (not ioc) is accessed, change is applied for that
specific ioc-queue pair.

This also fixes the following two race conditions between setting and
clearing of changed states.

* Missing barrier between assign/load of ioprio and ioprio_changed
  allowed applying old ioprio.

* Change requests could happen between application of change and
  clearing of changed variables.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c        |  2 +-
 block/blk-ioc.c           | 45 +++++++++++++++++++++++++++++++++++++++++++++
 block/cfq-iosched.c       | 28 +++++++++-------------------
 fs/ioprio.c               |  3 +--
 include/linux/iocontext.h | 14 +++++++++-----
 5 files changed, 65 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4b001dcd85b0..dc00835aab6a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1648,7 +1648,7 @@ static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	/* we don't lose anything even if ioc allocation fails */
 	ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
-		ioc->cgroup_changed = 1;
+		ioc_cgroup_changed(ioc);
 		put_io_context(ioc);
 	}
 }
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index b13ed96776c2..6f59fbad93d9 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -188,6 +188,51 @@ struct io_context *get_task_io_context(struct task_struct *task,
 }
 EXPORT_SYMBOL(get_task_io_context);
 
+void ioc_set_changed(struct io_context *ioc, int which)
+{
+	struct cfq_io_context *cic;
+	struct hlist_node *n;
+
+	hlist_for_each_entry(cic, n, &ioc->cic_list, cic_list)
+		set_bit(which, &cic->changed);
+}
+
+/**
+ * ioc_ioprio_changed - notify ioprio change
+ * @ioc: io_context of interest
+ * @ioprio: new ioprio
+ *
+ * @ioc's ioprio has changed to @ioprio.  Set %CIC_IOPRIO_CHANGED for all
+ * cic's.  iosched is responsible for checking the bit and applying it on
+ * request issue path.
+ */
+void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->lock, flags);
+	ioc->ioprio = ioprio;
+	ioc_set_changed(ioc, CIC_IOPRIO_CHANGED);
+	spin_unlock_irqrestore(&ioc->lock, flags);
+}
+
+/**
+ * ioc_cgroup_changed - notify cgroup change
+ * @ioc: io_context of interest
+ *
+ * @ioc's cgroup has changed.  Set %CIC_CGROUP_CHANGED for all cic's.
+ * iosched is responsible for checking the bit and applying it on request
+ * issue path.
+ */
+void ioc_cgroup_changed(struct io_context *ioc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ioc->lock, flags);
+	ioc_set_changed(ioc, CIC_CGROUP_CHANGED);
+	spin_unlock_irqrestore(&ioc->lock, flags);
+}
+
 static int __init blk_ioc_init(void)
 {
 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a612ca65f371..51aece2eea7c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2904,7 +2904,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 	cfq_clear_cfqq_prio_changed(cfqq);
 }
 
-static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
+static void changed_ioprio(struct cfq_io_context *cic)
 {
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	struct cfq_queue *cfqq;
@@ -2933,12 +2933,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
 
-static void cfq_ioc_set_ioprio(struct io_context *ioc)
-{
-	call_for_each_cic(ioc, changed_ioprio);
-	ioc->ioprio_changed = 0;
-}
-
 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			  pid_t pid, bool is_sync)
 {
@@ -2960,7 +2954,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
+static void changed_cgroup(struct cfq_io_context *cic)
 {
 	struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
@@ -2986,12 +2980,6 @@ static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
 
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
-
-static void cfq_ioc_set_cgroup(struct io_context *ioc)
-{
-	call_for_each_cic(ioc, changed_cgroup);
-	ioc->cgroup_changed = 0;
-}
 #endif  /* CONFIG_CFQ_GROUP_IOSCHED */
 
 static struct cfq_queue *
@@ -3222,13 +3210,15 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 out:
 	get_io_context(ioc);
 
-	if (unlikely(ioc->ioprio_changed))
-		cfq_ioc_set_ioprio(ioc);
-
+	if (unlikely(cic->changed)) {
+		if (test_and_clear_bit(CIC_IOPRIO_CHANGED, &cic->changed))
+			changed_ioprio(cic);
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-	if (unlikely(ioc->cgroup_changed))
-		cfq_ioc_set_cgroup(ioc);
+		if (test_and_clear_bit(CIC_CGROUP_CHANGED, &cic->changed))
+			changed_cgroup(cic);
 #endif
+	}
+
 	return cic;
 err:
 	if (cic)
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 998ec239d1ea..0f1b9515213b 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -50,8 +50,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
 
 	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
-		ioc->ioprio = ioprio;
-		ioc->ioprio_changed = 1;
+		ioc_ioprio_changed(ioc, ioprio);
 		put_io_context(ioc);
 	}
 
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 079aea8fd8a8..2c2b6da96b3c 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -13,6 +13,11 @@ struct cfq_ttime {
 	unsigned long ttime_mean;
 };
 
+enum {
+	CIC_IOPRIO_CHANGED,
+	CIC_CGROUP_CHANGED,
+};
+
 struct cfq_io_context {
 	void *key;
 	struct request_queue *q;
@@ -26,6 +31,8 @@ struct cfq_io_context {
 	struct list_head queue_list;
 	struct hlist_node cic_list;
 
+	unsigned long changed;
+
 	void (*dtor)(struct io_context *); /* destructor */
 	void (*exit)(struct io_context *); /* called on task exit */
 
@@ -44,11 +51,6 @@ struct io_context {
 	spinlock_t lock;
 
 	unsigned short ioprio;
-	unsigned short ioprio_changed;
-
-#if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
-	unsigned short cgroup_changed;
-#endif
 
 	/*
 	 * For request batching
@@ -81,6 +83,8 @@ void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
+void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
+void ioc_cgroup_changed(struct io_context *ioc);
 #else
 struct io_context;
 static inline void put_io_context(struct io_context *ioc) { }
-- 
cgit v1.2.3


From b2efa05265d62bc29f3a64400fad4b44340eedb8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:39 +0100
Subject: block, cfq: unlink cfq_io_context's immediately

cic is association between io_context and request_queue.  A cic is
linked from both ioc and q and should be destroyed when either one
goes away.  As ioc and q both have their own locks, locking becomes a
bit complex - both orders work for removal from one but not from the
other.

Currently, cfq tries to circumvent this locking order issue with RCU.
ioc->lock nests inside queue_lock but the radix tree and cic's are
also protected by RCU allowing either side to walk their lists without
grabbing lock.

This rather unconventional use of RCU quickly devolves into extremely
fragile convolution.  e.g. The following is from cfqd going away too
soon after ioc and q exits raced.

 general protection fault: 0000 [#1] PREEMPT SMP
 CPU 2
 Modules linked in:
 [   88.503444]
 Pid: 599, comm: hexdump Not tainted 3.1.0-rc10-work+ #158 Bochs Bochs
 RIP: 0010:[<ffffffff81397628>]  [<ffffffff81397628>] cfq_exit_single_io_context+0x58/0xf0
 ...
 Call Trace:
  [<ffffffff81395a4a>] call_for_each_cic+0x5a/0x90
  [<ffffffff81395ab5>] cfq_exit_io_context+0x15/0x20
  [<ffffffff81389130>] exit_io_context+0x100/0x140
  [<ffffffff81098a29>] do_exit+0x579/0x850
  [<ffffffff81098d5b>] do_group_exit+0x5b/0xd0
  [<ffffffff81098de7>] sys_exit_group+0x17/0x20
  [<ffffffff81b02f2b>] system_call_fastpath+0x16/0x1b

The only real hot path here is cic lookup during request
initialization and avoiding extra locking requires very confined use
of RCU.  This patch makes cic removal from both ioc and request_queue
perform double-locking and unlink immediately.

* From q side, the change is almost trivial as ioc->lock nests inside
  queue_lock.  It just needs to grab each ioc->lock as it walks
  cic_list and unlink it.

* From ioc side, it's a bit more difficult because of inversed lock
  order.  ioc needs its lock to walk its cic_list but can't grab the
  matching queue_lock and needs to perform unlock-relock dancing.

  Unlinking is now wholly done from put_io_context() and fast path is
  optimized by using the queue_lock the caller already holds, which is
  by far the most common case.  If the ioc accessed multiple devices,
  it tries with trylock.  In unlikely cases of fast path failure, it
  falls back to full double-locking dance from workqueue.

Double-locking isn't the prettiest thing in the world but it's *far*
simpler and more understandable than RCU trick without adding any
meaningful overhead.

This still leaves a lot of now unnecessary RCU logics.  Future patches
will trim them.

-v2: Vivek pointed out that cic->q was being dereferenced after
     cic->release() was called.  Updated to use local variable @this_q
     instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c        |   2 +-
 block/blk-ioc.c           | 166 ++++++++++++++++++++++++++++++++++++++--------
 block/cfq-iosched.c       |  44 +++---------
 fs/ioprio.c               |   2 +-
 include/linux/blkdev.h    |   3 +
 include/linux/iocontext.h |  12 ++--
 kernel/fork.c             |   2 +-
 7 files changed, 159 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index dc00835aab6a..278869358049 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1649,7 +1649,7 @@ static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	ioc = get_task_io_context(tsk, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
 		ioc_cgroup_changed(ioc);
-		put_io_context(ioc);
+		put_io_context(ioc, NULL);
 	}
 }
 
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 6f59fbad93d9..fb23965595da 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -29,55 +29,164 @@ void get_io_context(struct io_context *ioc)
 }
 EXPORT_SYMBOL(get_io_context);
 
-static void cfq_dtor(struct io_context *ioc)
+/*
+ * Releasing ioc may nest into another put_io_context() leading to nested
+ * fast path release.  As the ioc's can't be the same, this is okay but
+ * makes lockdep whine.  Keep track of nesting and use it as subclass.
+ */
+#ifdef CONFIG_LOCKDEP
+#define ioc_release_depth(q)		((q) ? (q)->ioc_release_depth : 0)
+#define ioc_release_depth_inc(q)	(q)->ioc_release_depth++
+#define ioc_release_depth_dec(q)	(q)->ioc_release_depth--
+#else
+#define ioc_release_depth(q)		0
+#define ioc_release_depth_inc(q)	do { } while (0)
+#define ioc_release_depth_dec(q)	do { } while (0)
+#endif
+
+/*
+ * Slow path for ioc release in put_io_context().  Performs double-lock
+ * dancing to unlink all cic's and then frees ioc.
+ */
+static void ioc_release_fn(struct work_struct *work)
 {
-	if (!hlist_empty(&ioc->cic_list)) {
-		struct cfq_io_context *cic;
+	struct io_context *ioc = container_of(work, struct io_context,
+					      release_work);
+	struct request_queue *last_q = NULL;
+
+	spin_lock_irq(&ioc->lock);
+
+	while (!hlist_empty(&ioc->cic_list)) {
+		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
+							 struct cfq_io_context,
+							 cic_list);
+		struct request_queue *this_q = cic->q;
+
+		if (this_q != last_q) {
+			/*
+			 * Need to switch to @this_q.  Once we release
+			 * @ioc->lock, it can go away along with @cic.
+			 * Hold on to it.
+			 */
+			__blk_get_queue(this_q);
+
+			/*
+			 * blk_put_queue() might sleep thanks to kobject
+			 * idiocy.  Always release both locks, put and
+			 * restart.
+			 */
+			if (last_q) {
+				spin_unlock(last_q->queue_lock);
+				spin_unlock_irq(&ioc->lock);
+				blk_put_queue(last_q);
+			} else {
+				spin_unlock_irq(&ioc->lock);
+			}
+
+			last_q = this_q;
+			spin_lock_irq(this_q->queue_lock);
+			spin_lock(&ioc->lock);
+			continue;
+		}
+		ioc_release_depth_inc(this_q);
+		cic->exit(cic);
+		cic->release(cic);
+		ioc_release_depth_dec(this_q);
+	}
 
-		cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
-								cic_list);
-		cic->dtor(ioc);
+	if (last_q) {
+		spin_unlock(last_q->queue_lock);
+		spin_unlock_irq(&ioc->lock);
+		blk_put_queue(last_q);
+	} else {
+		spin_unlock_irq(&ioc->lock);
 	}
+
+	kmem_cache_free(iocontext_cachep, ioc);
 }
 
 /**
  * put_io_context - put a reference of io_context
  * @ioc: io_context to put
+ * @locked_q: request_queue the caller is holding queue_lock of (hint)
  *
  * Decrement reference count of @ioc and release it if the count reaches
- * zero.
+ * zero.  If the caller is holding queue_lock of a queue, it can indicate
+ * that with @locked_q.  This is an optimization hint and the caller is
+ * allowed to pass in %NULL even when it's holding a queue_lock.
  */
-void put_io_context(struct io_context *ioc)
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 {
+	struct request_queue *last_q = locked_q;
+	unsigned long flags;
+
 	if (ioc == NULL)
 		return;
 
 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
+	if (locked_q)
+		lockdep_assert_held(locked_q->queue_lock);
 
 	if (!atomic_long_dec_and_test(&ioc->refcount))
 		return;
 
-	rcu_read_lock();
-	cfq_dtor(ioc);
-	rcu_read_unlock();
-
-	kmem_cache_free(iocontext_cachep, ioc);
-}
-EXPORT_SYMBOL(put_io_context);
+	/*
+	 * Destroy @ioc.  This is a bit messy because cic's are chained
+	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
+	 * The inner ioc->lock should be held to walk our cic_list and then
+	 * for each cic the outer matching queue_lock should be grabbed.
+	 * ie. We need to do reverse-order double lock dancing.
+	 *
+	 * Another twist is that we are often called with one of the
+	 * matching queue_locks held as indicated by @locked_q, which
+	 * prevents performing double-lock dance for other queues.
+	 *
+	 * So, we do it in two stages.  The fast path uses the queue_lock
+	 * the caller is holding and, if other queues need to be accessed,
+	 * uses trylock to avoid introducing locking dependency.  This can
+	 * handle most cases, especially if @ioc was performing IO on only
+	 * single device.
+	 *
+	 * If trylock doesn't cut it, we defer to @ioc->release_work which
+	 * can do all the double-locking dancing.
+	 */
+	spin_lock_irqsave_nested(&ioc->lock, flags,
+				 ioc_release_depth(locked_q));
+
+	while (!hlist_empty(&ioc->cic_list)) {
+		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
+							 struct cfq_io_context,
+							 cic_list);
+		struct request_queue *this_q = cic->q;
+
+		if (this_q != last_q) {
+			if (last_q && last_q != locked_q)
+				spin_unlock(last_q->queue_lock);
+			last_q = NULL;
+
+			if (!spin_trylock(this_q->queue_lock))
+				break;
+			last_q = this_q;
+			continue;
+		}
+		ioc_release_depth_inc(this_q);
+		cic->exit(cic);
+		cic->release(cic);
+		ioc_release_depth_dec(this_q);
+	}
 
-static void cfq_exit(struct io_context *ioc)
-{
-	rcu_read_lock();
+	if (last_q && last_q != locked_q)
+		spin_unlock(last_q->queue_lock);
 
-	if (!hlist_empty(&ioc->cic_list)) {
-		struct cfq_io_context *cic;
+	spin_unlock_irqrestore(&ioc->lock, flags);
 
-		cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context,
-								cic_list);
-		cic->exit(ioc);
-	}
-	rcu_read_unlock();
+	/* if no cic's left, we're done; otherwise, kick release_work */
+	if (hlist_empty(&ioc->cic_list))
+		kmem_cache_free(iocontext_cachep, ioc);
+	else
+		schedule_work(&ioc->release_work);
 }
+EXPORT_SYMBOL(put_io_context);
 
 /* Called by the exiting task */
 void exit_io_context(struct task_struct *task)
@@ -92,10 +201,8 @@ void exit_io_context(struct task_struct *task)
 	task->io_context = NULL;
 	task_unlock(task);
 
-	if (atomic_dec_and_test(&ioc->nr_tasks))
-		cfq_exit(ioc);
-
-	put_io_context(ioc);
+	atomic_dec(&ioc->nr_tasks);
+	put_io_context(ioc, NULL);
 }
 
 static struct io_context *create_task_io_context(struct task_struct *task,
@@ -115,6 +222,7 @@ static struct io_context *create_task_io_context(struct task_struct *task,
 	spin_lock_init(&ioc->lock);
 	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
 	INIT_HLIST_HEAD(&ioc->cic_list);
+	INIT_WORK(&ioc->release_work, ioc_release_fn);
 
 	/* try to install, somebody might already have beaten us to it */
 	task_lock(task);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e617b088c59b..6cc606560402 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1778,7 +1778,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		cfqd->active_queue = NULL;
 
 	if (cfqd->active_cic) {
-		put_io_context(cfqd->active_cic->ioc);
+		put_io_context(cfqd->active_cic->ioc, cfqd->queue);
 		cfqd->active_cic = NULL;
 	}
 }
@@ -2812,38 +2812,6 @@ static void cfq_exit_cic(struct cfq_io_context *cic)
 	}
 }
 
-static void cfq_exit_single_io_context(struct io_context *ioc,
-				       struct cfq_io_context *cic)
-{
-	struct cfq_data *cfqd = cic_to_cfqd(cic);
-
-	if (cfqd) {
-		struct request_queue *q = cfqd->queue;
-		unsigned long flags;
-
-		spin_lock_irqsave(q->queue_lock, flags);
-
-		/*
-		 * Ensure we get a fresh copy of the ->key to prevent
-		 * race between exiting task and queue
-		 */
-		smp_read_barrier_depends();
-		if (cic->key == cfqd)
-			cfq_exit_cic(cic);
-
-		spin_unlock_irqrestore(q->queue_lock, flags);
-	}
-}
-
-/*
- * The process that ioc belongs to has exited, we need to clean up
- * and put the internal structures we have that belongs to that process.
- */
-static void cfq_exit_io_context(struct io_context *ioc)
-{
-	call_for_each_cic(ioc, cfq_exit_single_io_context);
-}
-
 static struct cfq_io_context *
 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
@@ -2855,8 +2823,8 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 		cic->ttime.last_end_request = jiffies;
 		INIT_LIST_HEAD(&cic->queue_list);
 		INIT_HLIST_NODE(&cic->cic_list);
-		cic->dtor = cfq_free_io_context;
-		cic->exit = cfq_exit_io_context;
+		cic->exit = cfq_exit_cic;
+		cic->release = cfq_release_cic;
 		elv_ioc_count_inc(cfq_ioc_count);
 	}
 
@@ -3726,7 +3694,7 @@ static void cfq_put_request(struct request *rq)
 		BUG_ON(!cfqq->allocated[rw]);
 		cfqq->allocated[rw]--;
 
-		put_io_context(RQ_CIC(rq)->ioc);
+		put_io_context(RQ_CIC(rq)->ioc, cfqq->cfqd->queue);
 
 		rq->elevator_private[0] = NULL;
 		rq->elevator_private[1] = NULL;
@@ -3937,8 +3905,12 @@ static void cfq_exit_queue(struct elevator_queue *e)
 		struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
 							struct cfq_io_context,
 							queue_list);
+		struct io_context *ioc = cic->ioc;
 
+		spin_lock(&ioc->lock);
 		cfq_exit_cic(cic);
+		cfq_release_cic(cic);
+		spin_unlock(&ioc->lock);
 	}
 
 	cfq_put_async_queues(cfqd);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 0f1b9515213b..f84b380d65e5 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
 	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
 		ioc_ioprio_changed(ioc, ioprio);
-		put_io_context(ioc);
+		put_io_context(ioc, NULL);
 	}
 
 	return err;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d1b6f4ed1f96..65c2f8c70089 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -393,6 +393,9 @@ struct request_queue {
 	/* Throttle data */
 	struct throtl_data *td;
 #endif
+#ifdef CONFIG_LOCKDEP
+	int			ioc_release_depth;
+#endif
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 2c2b6da96b3c..01e863128780 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -3,6 +3,7 @@
 
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
+#include <linux/workqueue.h>
 
 struct cfq_queue;
 struct cfq_ttime {
@@ -33,8 +34,8 @@ struct cfq_io_context {
 
 	unsigned long changed;
 
-	void (*dtor)(struct io_context *); /* destructor */
-	void (*exit)(struct io_context *); /* called on task exit */
+	void (*exit)(struct cfq_io_context *);
+	void (*release)(struct cfq_io_context *);
 
 	struct rcu_head rcu_head;
 };
@@ -61,6 +62,8 @@ struct io_context {
 	struct radix_tree_root radix_root;
 	struct hlist_head cic_list;
 	void __rcu *ioc_data;
+
+	struct work_struct release_work;
 };
 
 static inline struct io_context *ioc_task_link(struct io_context *ioc)
@@ -79,7 +82,7 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 
 struct task_struct;
 #ifdef CONFIG_BLOCK
-void put_io_context(struct io_context *ioc);
+void put_io_context(struct io_context *ioc, struct request_queue *locked_q);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
@@ -87,7 +90,8 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
 void ioc_cgroup_changed(struct io_context *ioc);
 #else
 struct io_context;
-static inline void put_io_context(struct io_context *ioc) { }
+static inline void put_io_context(struct io_context *ioc,
+				  struct request_queue *locked_q) { }
 static inline void exit_io_context(struct task_struct *task) { }
 #endif
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 5bcfc739bb7c..2753449f2038 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -887,7 +887,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 			return -ENOMEM;
 
 		new_ioc->ioprio = ioc->ioprio;
-		put_io_context(new_ioc);
+		put_io_context(new_ioc, NULL);
 	}
 #endif
 	return 0;
-- 
cgit v1.2.3


From b9a1920837bc53430d339380e393a6e4c372939f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:39 +0100
Subject: block, cfq: remove delayed unlink

Now that all cic's are immediately unlinked from both ioc and queue,
lazy dropping from lookup path and trimming on elevator unregister are
unnecessary.  Kill them and remove now unused elevator_ops->trim().

This also leaves call_for_each_cic() without any user.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c      | 92 ++++++------------------------------------------
 block/elevator.c         | 16 ---------
 include/linux/elevator.h |  1 -
 3 files changed, 10 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6cc606560402..ff44435fad50 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2669,24 +2669,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	cfq_put_cfqg(cfqg);
 }
 
-/*
- * Call func for each cic attached to this ioc.
- */
-static void
-call_for_each_cic(struct io_context *ioc,
-		  void (*func)(struct io_context *, struct cfq_io_context *))
-{
-	struct cfq_io_context *cic;
-	struct hlist_node *n;
-
-	rcu_read_lock();
-
-	hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
-		func(ioc, cic);
-
-	rcu_read_unlock();
-}
-
 static void cfq_cic_free_rcu(struct rcu_head *head)
 {
 	struct cfq_io_context *cic;
@@ -2727,31 +2709,6 @@ static void cfq_release_cic(struct cfq_io_context *cic)
 	cfq_cic_free(cic);
 }
 
-static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ioc->lock, flags);
-	cfq_release_cic(cic);
-	spin_unlock_irqrestore(&ioc->lock, flags);
-}
-
-/*
- * Must be called with rcu_read_lock() held or preemption otherwise disabled.
- * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
- * and ->trim() which is called with the task lock held
- */
-static void cfq_free_io_context(struct io_context *ioc)
-{
-	/*
-	 * ioc->refcount is zero here, or we are called from elv_unregister(),
-	 * so no more cic's are allowed to be linked into this ioc.  So it
-	 * should be ok to iterate over the known list, we will see all cic's
-	 * since no new ones are added.
-	 */
-	call_for_each_cic(ioc, cic_free_func);
-}
-
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
 {
 	struct cfq_queue *__cfqq, *next;
@@ -3037,30 +2994,6 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 	return cfqq;
 }
 
-/*
- * We drop cfq io contexts lazily, so we may find a dead one.
- */
-static void
-cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
-		  struct cfq_io_context *cic)
-{
-	unsigned long flags;
-
-	WARN_ON(!list_empty(&cic->queue_list));
-	BUG_ON(cic->key != cfqd_dead_key(cfqd));
-
-	spin_lock_irqsave(&ioc->lock, flags);
-
-	BUG_ON(rcu_dereference_check(ioc->ioc_data,
-		lockdep_is_held(&ioc->lock)) == cic);
-
-	radix_tree_delete(&ioc->radix_root, cfqd->queue->id);
-	hlist_del_rcu(&cic->cic_list);
-	spin_unlock_irqrestore(&ioc->lock, flags);
-
-	cfq_cic_free(cic);
-}
-
 /**
  * cfq_cic_lookup - lookup cfq_io_context
  * @cfqd: the associated cfq_data
@@ -3078,26 +3011,22 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 	if (unlikely(!ioc))
 		return NULL;
 
-	rcu_read_lock();
-
 	/*
-	 * we maintain a last-hit cache, to avoid browsing over the tree
+	 * cic's are indexed from @ioc using radix tree and hint pointer,
+	 * both of which are protected with RCU.  All removals are done
+	 * holding both q and ioc locks, and we're holding q lock - if we
+	 * find a cic which points to us, it's guaranteed to be valid.
 	 */
+	rcu_read_lock();
 	cic = rcu_dereference(ioc->ioc_data);
 	if (cic && cic->key == cfqd)
 		goto out;
 
-	do {
-		cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
-		if (!cic)
-			break;
-		if (likely(cic->key == cfqd)) {
-			/* hint assignment itself can race safely */
-			rcu_assign_pointer(ioc->ioc_data, cic);
-			break;
-		}
-		cfq_drop_dead_cic(cfqd, ioc, cic);
-	} while (1);
+	cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
+	if (cic && cic->key == cfqd)
+		rcu_assign_pointer(ioc->ioc_data, cic);	/* allowed to race */
+	else
+		cic = NULL;
 out:
 	rcu_read_unlock();
 	return cic;
@@ -4182,7 +4111,6 @@ static struct elevator_type iosched_cfq = {
 		.elevator_may_queue_fn =	cfq_may_queue,
 		.elevator_init_fn =		cfq_init_queue,
 		.elevator_exit_fn =		cfq_exit_queue,
-		.trim =				cfq_free_io_context,
 	},
 	.elevator_attrs =	cfq_attrs,
 	.elevator_name =	"cfq",
diff --git a/block/elevator.c b/block/elevator.c
index 66343d6917d0..6a343e8f8319 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -913,22 +913,6 @@ EXPORT_SYMBOL_GPL(elv_register);
 
 void elv_unregister(struct elevator_type *e)
 {
-	struct task_struct *g, *p;
-
-	/*
-	 * Iterate every thread in the process to remove the io contexts.
-	 */
-	if (e->ops.trim) {
-		read_lock(&tasklist_lock);
-		do_each_thread(g, p) {
-			task_lock(p);
-			if (p->io_context)
-				e->ops.trim(p->io_context);
-			task_unlock(p);
-		} while_each_thread(g, p);
-		read_unlock(&tasklist_lock);
-	}
-
 	spin_lock(&elv_list_lock);
 	list_del_init(&e->list);
 	spin_unlock(&elv_list_lock);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1d0f7a2ff73b..581dd1bd3d3e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -63,7 +63,6 @@ struct elevator_ops
 
 	elevator_init_fn *elevator_init_fn;
 	elevator_exit_fn *elevator_exit_fn;
-	void (*trim)(struct io_context *);
 };
 
 #define ELV_NAME_MAX	(16)
-- 
cgit v1.2.3


From b50b636bce6293fa858cc7ff6c3ffe4920d90006 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:39 +0100
Subject: block, cfq: kill ioc_gone

Now that cic's are immediately unlinked under both locks, there's no
need to count and drain cic's before module unload.  RCU callback
completion is waited with rcu_barrier().

While at it, remove residual RCU operations on cic_list.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c      | 43 +++++--------------------------------------
 include/linux/elevator.h | 17 -----------------
 2 files changed, 5 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ff44435fad50..ae7791a8ded9 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -62,10 +62,6 @@ static const int cfq_hist_divisor = 4;
 static struct kmem_cache *cfq_pool;
 static struct kmem_cache *cfq_ioc_pool;
 
-static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
-static struct completion *ioc_gone;
-static DEFINE_SPINLOCK(ioc_gone_lock);
-
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
@@ -2671,26 +2667,8 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
 static void cfq_cic_free_rcu(struct rcu_head *head)
 {
-	struct cfq_io_context *cic;
-
-	cic = container_of(head, struct cfq_io_context, rcu_head);
-
-	kmem_cache_free(cfq_ioc_pool, cic);
-	elv_ioc_count_dec(cfq_ioc_count);
-
-	if (ioc_gone) {
-		/*
-		 * CFQ scheduler is exiting, grab exit lock and check
-		 * the pending io context count. If it hits zero,
-		 * complete ioc_gone and set it back to NULL
-		 */
-		spin_lock(&ioc_gone_lock);
-		if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
-			complete(ioc_gone);
-			ioc_gone = NULL;
-		}
-		spin_unlock(&ioc_gone_lock);
-	}
+	kmem_cache_free(cfq_ioc_pool,
+			container_of(head, struct cfq_io_context, rcu_head));
 }
 
 static void cfq_cic_free(struct cfq_io_context *cic)
@@ -2705,7 +2683,7 @@ static void cfq_release_cic(struct cfq_io_context *cic)
 
 	BUG_ON(!(dead_key & CIC_DEAD_KEY));
 	radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
-	hlist_del_rcu(&cic->cic_list);
+	hlist_del(&cic->cic_list);
 	cfq_cic_free(cic);
 }
 
@@ -2782,7 +2760,6 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 		INIT_HLIST_NODE(&cic->cic_list);
 		cic->exit = cfq_exit_cic;
 		cic->release = cfq_release_cic;
-		elv_ioc_count_inc(cfq_ioc_count);
 	}
 
 	return cic;
@@ -3072,7 +3049,7 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 	ret = radix_tree_insert(&ioc->radix_root, q->id, cic);
 	if (likely(!ret)) {
-		hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
+		hlist_add_head(&cic->cic_list, &ioc->cic_list);
 		list_add(&cic->queue_list, &cfqd->cic_list);
 		cic = NULL;
 	} else if (ret == -EEXIST) {
@@ -4156,19 +4133,9 @@ static int __init cfq_init(void)
 
 static void __exit cfq_exit(void)
 {
-	DECLARE_COMPLETION_ONSTACK(all_gone);
 	blkio_policy_unregister(&blkio_policy_cfq);
 	elv_unregister(&iosched_cfq);
-	ioc_gone = &all_gone;
-	/* ioc_gone's update must be visible before reading ioc_count */
-	smp_wmb();
-
-	/*
-	 * this also protects us from entering cfq_slab_kill() with
-	 * pending RCU callbacks
-	 */
-	if (elv_ioc_count_read(cfq_ioc_count))
-		wait_for_completion(&all_gone);
+	rcu_barrier();	/* make sure all cic RCU frees are complete */
 	cfq_slab_kill();
 }
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 581dd1bd3d3e..02604c89ddde 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -196,22 +196,5 @@ enum {
 	INIT_LIST_HEAD(&(rq)->csd.list);	\
 	} while (0)
 
-/*
- * io context count accounting
- */
-#define elv_ioc_count_mod(name, __val) this_cpu_add(name, __val)
-#define elv_ioc_count_inc(name)	this_cpu_inc(name)
-#define elv_ioc_count_dec(name)	this_cpu_dec(name)
-
-#define elv_ioc_count_read(name)				\
-({								\
-	unsigned long __val = 0;				\
-	int __cpu;						\
-	smp_wmb();						\
-	for_each_possible_cpu(__cpu)				\
-		__val += per_cpu(name, __cpu);			\
-	__val;							\
-})
-
 #endif /* CONFIG_BLOCK */
 #endif
-- 
cgit v1.2.3


From 1238033c79e92e5c315af12e45396f1a78c73dec Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:40 +0100
Subject: block, cfq: kill cic->key

Now that lazy paths are removed, cfqd_dead_key() is meaningless and
cic->q can be used whereever cic->key is used.  Kill cic->key.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c       | 26 +++++---------------------
 include/linux/iocontext.h |  1 -
 2 files changed, 5 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ae7791a8ded9..3b07ce168780 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -472,22 +472,9 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
 	cic->cfqq[is_sync] = cfqq;
 }
 
-#define CIC_DEAD_KEY	1ul
-#define CIC_DEAD_INDEX_SHIFT	1
-
-static inline void *cfqd_dead_key(struct cfq_data *cfqd)
-{
-	return (void *)(cfqd->queue->id << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
-}
-
 static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
 {
-	struct cfq_data *cfqd = cic->key;
-
-	if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
-		return NULL;
-
-	return cfqd;
+	return cic->q->elevator->elevator_data;
 }
 
 /*
@@ -2679,10 +2666,8 @@ static void cfq_cic_free(struct cfq_io_context *cic)
 static void cfq_release_cic(struct cfq_io_context *cic)
 {
 	struct io_context *ioc = cic->ioc;
-	unsigned long dead_key = (unsigned long) cic->key;
 
-	BUG_ON(!(dead_key & CIC_DEAD_KEY));
-	radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
+	radix_tree_delete(&ioc->radix_root, cic->q->id);
 	hlist_del(&cic->cic_list);
 	cfq_cic_free(cic);
 }
@@ -2726,7 +2711,6 @@ static void cfq_exit_cic(struct cfq_io_context *cic)
 	struct io_context *ioc = cic->ioc;
 
 	list_del_init(&cic->queue_list);
-	cic->key = cfqd_dead_key(cfqd);
 
 	/*
 	 * Both setting lookup hint to and clearing it from @cic are done
@@ -2982,6 +2966,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 static struct cfq_io_context *
 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 {
+	struct request_queue *q = cfqd->queue;
 	struct cfq_io_context *cic;
 
 	lockdep_assert_held(cfqd->queue->queue_lock);
@@ -2996,11 +2981,11 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 	 */
 	rcu_read_lock();
 	cic = rcu_dereference(ioc->ioc_data);
-	if (cic && cic->key == cfqd)
+	if (cic && cic->q == q)
 		goto out;
 
 	cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
-	if (cic && cic->key == cfqd)
+	if (cic && cic->q == q)
 		rcu_assign_pointer(ioc->ioc_data, cic);	/* allowed to race */
 	else
 		cic = NULL;
@@ -3040,7 +3025,6 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 		goto out;
 
 	cic->ioc = ioc;
-	cic->key = cfqd;
 	cic->q = cfqd->queue;
 
 	/* lock both q and ioc and try to link @cic */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 01e863128780..b2b75a54f252 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -20,7 +20,6 @@ enum {
 };
 
 struct cfq_io_context {
-	void *key;
 	struct request_queue *q;
 
 	struct cfq_queue *cfqq[2];
-- 
cgit v1.2.3


From 22f746e235a5cbee2a6ca9887b1be2aa7d31fe71 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:41 +0100
Subject: block: remove elevator_queue->ops

elevator_queue->ops points to the same ops struct ->elevator_type.ops
is pointing to.  The only effect of caching it in elevator_queue is
shorter notation - it doesn't save any indirect derefence.

Relocate elevator_type->list which used only during module init/exit
to the end of the structure, rename elevator_queue->elevator_type to
->type, and replace elevator_queue->ops with elevator_queue->type.ops.

This doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h              | 10 +++----
 block/elevator.c         | 74 +++++++++++++++++++++++-------------------------
 include/linux/elevator.h |  5 ++--
 3 files changed, 43 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index 5bca2668e1bf..4943770e0792 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -94,7 +94,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			return NULL;
 		}
 		if (unlikely(blk_queue_dead(q)) ||
-		    !q->elevator->ops->elevator_dispatch_fn(q, 0))
+		    !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
 }
@@ -103,16 +103,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_activate_req_fn)
-		e->ops->elevator_activate_req_fn(q, rq);
+	if (e->type->ops.elevator_activate_req_fn)
+		e->type->ops.elevator_activate_req_fn(q, rq);
 }
 
 static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_deactivate_req_fn)
-		e->ops->elevator_deactivate_req_fn(q, rq);
+	if (e->type->ops.elevator_deactivate_req_fn)
+		e->type->ops.elevator_deactivate_req_fn(q, rq);
 }
 
 #ifdef CONFIG_FAIL_IO_TIMEOUT
diff --git a/block/elevator.c b/block/elevator.c
index a16c2d1713e5..31ffe76aed3d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -61,8 +61,8 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 	struct request_queue *q = rq->q;
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_allow_merge_fn)
-		return e->ops->elevator_allow_merge_fn(q, rq, bio);
+	if (e->type->ops.elevator_allow_merge_fn)
+		return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
 
 	return 1;
 }
@@ -171,7 +171,7 @@ static struct elevator_type *elevator_get(const char *name)
 static int elevator_init_queue(struct request_queue *q,
 			       struct elevator_queue *eq)
 {
-	eq->elevator_data = eq->ops->elevator_init_fn(q);
+	eq->elevator_data = eq->type->ops.elevator_init_fn(q);
 	if (eq->elevator_data)
 		return 0;
 	return -ENOMEM;
@@ -203,8 +203,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q,
 	if (unlikely(!eq))
 		goto err;
 
-	eq->ops = &e->ops;
-	eq->elevator_type = e;
+	eq->type = e;
 	kobject_init(&eq->kobj, &elv_ktype);
 	mutex_init(&eq->sysfs_lock);
 
@@ -228,7 +227,7 @@ static void elevator_release(struct kobject *kobj)
 	struct elevator_queue *e;
 
 	e = container_of(kobj, struct elevator_queue, kobj);
-	elevator_put(e->elevator_type);
+	elevator_put(e->type);
 	kfree(e->hash);
 	kfree(e);
 }
@@ -288,9 +287,8 @@ EXPORT_SYMBOL(elevator_init);
 void elevator_exit(struct elevator_queue *e)
 {
 	mutex_lock(&e->sysfs_lock);
-	if (e->ops->elevator_exit_fn)
-		e->ops->elevator_exit_fn(e);
-	e->ops = NULL;
+	if (e->type->ops.elevator_exit_fn)
+		e->type->ops.elevator_exit_fn(e);
 	mutex_unlock(&e->sysfs_lock);
 
 	kobject_put(&e->kobj);
@@ -500,8 +498,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 		return ELEVATOR_BACK_MERGE;
 	}
 
-	if (e->ops->elevator_merge_fn)
-		return e->ops->elevator_merge_fn(q, req, bio);
+	if (e->type->ops.elevator_merge_fn)
+		return e->type->ops.elevator_merge_fn(q, req, bio);
 
 	return ELEVATOR_NO_MERGE;
 }
@@ -544,8 +542,8 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_merged_fn)
-		e->ops->elevator_merged_fn(q, rq, type);
+	if (e->type->ops.elevator_merged_fn)
+		e->type->ops.elevator_merged_fn(q, rq, type);
 
 	if (type == ELEVATOR_BACK_MERGE)
 		elv_rqhash_reposition(q, rq);
@@ -559,8 +557,8 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
 	struct elevator_queue *e = q->elevator;
 	const int next_sorted = next->cmd_flags & REQ_SORTED;
 
-	if (next_sorted && e->ops->elevator_merge_req_fn)
-		e->ops->elevator_merge_req_fn(q, rq, next);
+	if (next_sorted && e->type->ops.elevator_merge_req_fn)
+		e->type->ops.elevator_merge_req_fn(q, rq, next);
 
 	elv_rqhash_reposition(q, rq);
 
@@ -577,8 +575,8 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_bio_merged_fn)
-		e->ops->elevator_bio_merged_fn(q, rq, bio);
+	if (e->type->ops.elevator_bio_merged_fn)
+		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
 }
 
 void elv_requeue_request(struct request_queue *q, struct request *rq)
@@ -604,12 +602,12 @@ void elv_drain_elevator(struct request_queue *q)
 
 	lockdep_assert_held(q->queue_lock);
 
-	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+	while (q->elevator->type->ops.elevator_dispatch_fn(q, 1))
 		;
 	if (q->nr_sorted && printed++ < 10) {
 		printk(KERN_ERR "%s: forced dispatching is broken "
 		       "(nr_sorted=%u), please report this\n",
-		       q->elevator->elevator_type->elevator_name, q->nr_sorted);
+		       q->elevator->type->elevator_name, q->nr_sorted);
 	}
 }
 
@@ -698,7 +696,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
 		 * rq cannot be accessed after calling
 		 * elevator_add_req_fn.
 		 */
-		q->elevator->ops->elevator_add_req_fn(q, rq);
+		q->elevator->type->ops.elevator_add_req_fn(q, rq);
 		break;
 
 	case ELEVATOR_INSERT_FLUSH:
@@ -727,8 +725,8 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_latter_req_fn)
-		return e->ops->elevator_latter_req_fn(q, rq);
+	if (e->type->ops.elevator_latter_req_fn)
+		return e->type->ops.elevator_latter_req_fn(q, rq);
 	return NULL;
 }
 
@@ -736,8 +734,8 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_former_req_fn)
-		return e->ops->elevator_former_req_fn(q, rq);
+	if (e->type->ops.elevator_former_req_fn)
+		return e->type->ops.elevator_former_req_fn(q, rq);
 	return NULL;
 }
 
@@ -745,8 +743,8 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_set_req_fn)
-		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
+	if (e->type->ops.elevator_set_req_fn)
+		return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
 
 	rq->elevator_private[0] = NULL;
 	return 0;
@@ -756,16 +754,16 @@ void elv_put_request(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_put_req_fn)
-		e->ops->elevator_put_req_fn(rq);
+	if (e->type->ops.elevator_put_req_fn)
+		e->type->ops.elevator_put_req_fn(rq);
 }
 
 int elv_may_queue(struct request_queue *q, int rw)
 {
 	struct elevator_queue *e = q->elevator;
 
-	if (e->ops->elevator_may_queue_fn)
-		return e->ops->elevator_may_queue_fn(q, rw);
+	if (e->type->ops.elevator_may_queue_fn)
+		return e->type->ops.elevator_may_queue_fn(q, rw);
 
 	return ELV_MQUEUE_MAY;
 }
@@ -800,8 +798,8 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]--;
 		if ((rq->cmd_flags & REQ_SORTED) &&
-		    e->ops->elevator_completed_req_fn)
-			e->ops->elevator_completed_req_fn(q, rq);
+		    e->type->ops.elevator_completed_req_fn)
+			e->type->ops.elevator_completed_req_fn(q, rq);
 	}
 }
 
@@ -819,7 +817,7 @@ elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 
 	e = container_of(kobj, struct elevator_queue, kobj);
 	mutex_lock(&e->sysfs_lock);
-	error = e->ops ? entry->show(e, page) : -ENOENT;
+	error = e->type ? entry->show(e, page) : -ENOENT;
 	mutex_unlock(&e->sysfs_lock);
 	return error;
 }
@@ -837,7 +835,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr,
 
 	e = container_of(kobj, struct elevator_queue, kobj);
 	mutex_lock(&e->sysfs_lock);
-	error = e->ops ? entry->store(e, page, length) : -ENOENT;
+	error = e->type ? entry->store(e, page, length) : -ENOENT;
 	mutex_unlock(&e->sysfs_lock);
 	return error;
 }
@@ -858,7 +856,7 @@ int __elv_register_queue(struct request_queue *q, struct elevator_queue *e)
 
 	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
 	if (!error) {
-		struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;
+		struct elv_fs_entry *attr = e->type->elevator_attrs;
 		if (attr) {
 			while (attr->attr.name) {
 				if (sysfs_create_file(&e->kobj, &attr->attr))
@@ -959,7 +957,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	elevator_exit(old_elevator);
 	elv_quiesce_end(q);
 
-	blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
+	blk_add_trace_msg(q, "elv switch: %s", e->type->elevator_name);
 
 	return 0;
 
@@ -993,7 +991,7 @@ int elevator_change(struct request_queue *q, const char *name)
 		return -EINVAL;
 	}
 
-	if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
+	if (!strcmp(elevator_name, q->elevator->type->elevator_name)) {
 		elevator_put(e);
 		return 0;
 	}
@@ -1028,7 +1026,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
 	if (!q->elevator || !blk_queue_stackable(q))
 		return sprintf(name, "none\n");
 
-	elv = e->elevator_type;
+	elv = e->type;
 
 	spin_lock(&elv_list_lock);
 	list_for_each_entry(__e, &elv_list, list) {
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 02604c89ddde..04958ef53e62 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -78,11 +78,11 @@ struct elv_fs_entry {
  */
 struct elevator_type
 {
-	struct list_head list;
 	struct elevator_ops ops;
 	struct elv_fs_entry *elevator_attrs;
 	char elevator_name[ELV_NAME_MAX];
 	struct module *elevator_owner;
+	struct list_head list;
 };
 
 /*
@@ -90,10 +90,9 @@ struct elevator_type
  */
 struct elevator_queue
 {
-	struct elevator_ops *ops;
+	struct elevator_type *type;
 	void *elevator_data;
 	struct kobject kobj;
-	struct elevator_type *elevator_type;
 	struct mutex sysfs_lock;
 	struct hlist_head *hash;
 	unsigned int registered:1;
-- 
cgit v1.2.3


From c58698073218f2c8f2fc5982fa3938c2d3803b9f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:41 +0100
Subject: block, cfq: reorganize cfq_io_context into generic and cfq specific
 parts

Currently io_context and cfq logics are mixed without clear boundary.
Most of io_context is independent from cfq but cfq_io_context handling
logic is dispersed between generic ioc code and cfq.

cfq_io_context represents association between an io_context and a
request_queue, which is a concept useful outside of cfq, but it also
contains fields which are useful only to cfq.

This patch takes out generic part and put it into io_cq (io
context-queue) and the rest into cfq_io_cq (cic moniker remains the
same) which contains io_cq.  The following changes are made together.

* cfq_ttime and cfq_io_cq now live in cfq-iosched.c.

* All related fields, functions and constants are renamed accordingly.

* ioc->ioc_data is now "struct io_cq *" instead of "void *" and
  renamed to icq_hint.

This prepares for io_context API cleanup.  Documentation is currently
sparse.  It will be added later.

Changes in this patch are mechanical and don't cause functional
change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ioc.c           |  58 ++++++-----
 block/cfq-iosched.c       | 248 +++++++++++++++++++++++++---------------------
 include/linux/iocontext.h |  43 +++-----
 3 files changed, 175 insertions(+), 174 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index e23c797b4685..dc5e69d335a0 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL(get_io_context);
 
 /*
  * Slow path for ioc release in put_io_context().  Performs double-lock
- * dancing to unlink all cic's and then frees ioc.
+ * dancing to unlink all icq's and then frees ioc.
  */
 static void ioc_release_fn(struct work_struct *work)
 {
@@ -56,11 +56,10 @@ static void ioc_release_fn(struct work_struct *work)
 
 	spin_lock_irq(&ioc->lock);
 
-	while (!hlist_empty(&ioc->cic_list)) {
-		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
-							 struct cfq_io_context,
-							 cic_list);
-		struct request_queue *this_q = cic->q;
+	while (!hlist_empty(&ioc->icq_list)) {
+		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
+						struct io_cq, ioc_node);
+		struct request_queue *this_q = icq->q;
 
 		if (this_q != last_q) {
 			/*
@@ -89,8 +88,8 @@ static void ioc_release_fn(struct work_struct *work)
 			continue;
 		}
 		ioc_release_depth_inc(this_q);
-		cic->exit(cic);
-		cic->release(cic);
+		icq->exit(icq);
+		icq->release(icq);
 		ioc_release_depth_dec(this_q);
 	}
 
@@ -131,10 +130,10 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 		return;
 
 	/*
-	 * Destroy @ioc.  This is a bit messy because cic's are chained
+	 * Destroy @ioc.  This is a bit messy because icq's are chained
 	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
-	 * The inner ioc->lock should be held to walk our cic_list and then
-	 * for each cic the outer matching queue_lock should be grabbed.
+	 * The inner ioc->lock should be held to walk our icq_list and then
+	 * for each icq the outer matching queue_lock should be grabbed.
 	 * ie. We need to do reverse-order double lock dancing.
 	 *
 	 * Another twist is that we are often called with one of the
@@ -153,11 +152,10 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 	spin_lock_irqsave_nested(&ioc->lock, flags,
 				 ioc_release_depth(locked_q));
 
-	while (!hlist_empty(&ioc->cic_list)) {
-		struct cfq_io_context *cic = hlist_entry(ioc->cic_list.first,
-							 struct cfq_io_context,
-							 cic_list);
-		struct request_queue *this_q = cic->q;
+	while (!hlist_empty(&ioc->icq_list)) {
+		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
+						struct io_cq, ioc_node);
+		struct request_queue *this_q = icq->q;
 
 		if (this_q != last_q) {
 			if (last_q && last_q != locked_q)
@@ -170,8 +168,8 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 			continue;
 		}
 		ioc_release_depth_inc(this_q);
-		cic->exit(cic);
-		cic->release(cic);
+		icq->exit(icq);
+		icq->release(icq);
 		ioc_release_depth_dec(this_q);
 	}
 
@@ -180,8 +178,8 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
-	/* if no cic's left, we're done; otherwise, kick release_work */
-	if (hlist_empty(&ioc->cic_list))
+	/* if no icq is left, we're done; otherwise, kick release_work */
+	if (hlist_empty(&ioc->icq_list))
 		kmem_cache_free(iocontext_cachep, ioc);
 	else
 		schedule_work(&ioc->release_work);
@@ -219,8 +217,8 @@ void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
 	atomic_long_set(&ioc->refcount, 1);
 	atomic_set(&ioc->nr_tasks, 1);
 	spin_lock_init(&ioc->lock);
-	INIT_RADIX_TREE(&ioc->radix_root, GFP_ATOMIC | __GFP_HIGH);
-	INIT_HLIST_HEAD(&ioc->cic_list);
+	INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
+	INIT_HLIST_HEAD(&ioc->icq_list);
 	INIT_WORK(&ioc->release_work, ioc_release_fn);
 
 	/* try to install, somebody might already have beaten us to it */
@@ -270,11 +268,11 @@ EXPORT_SYMBOL(get_task_io_context);
 
 void ioc_set_changed(struct io_context *ioc, int which)
 {
-	struct cfq_io_context *cic;
+	struct io_cq *icq;
 	struct hlist_node *n;
 
-	hlist_for_each_entry(cic, n, &ioc->cic_list, cic_list)
-		set_bit(which, &cic->changed);
+	hlist_for_each_entry(icq, n, &ioc->icq_list, ioc_node)
+		set_bit(which, &icq->changed);
 }
 
 /**
@@ -282,8 +280,8 @@ void ioc_set_changed(struct io_context *ioc, int which)
  * @ioc: io_context of interest
  * @ioprio: new ioprio
  *
- * @ioc's ioprio has changed to @ioprio.  Set %CIC_IOPRIO_CHANGED for all
- * cic's.  iosched is responsible for checking the bit and applying it on
+ * @ioc's ioprio has changed to @ioprio.  Set %ICQ_IOPRIO_CHANGED for all
+ * icq's.  iosched is responsible for checking the bit and applying it on
  * request issue path.
  */
 void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
@@ -292,7 +290,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
 
 	spin_lock_irqsave(&ioc->lock, flags);
 	ioc->ioprio = ioprio;
-	ioc_set_changed(ioc, CIC_IOPRIO_CHANGED);
+	ioc_set_changed(ioc, ICQ_IOPRIO_CHANGED);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 }
 
@@ -300,7 +298,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio)
  * ioc_cgroup_changed - notify cgroup change
  * @ioc: io_context of interest
  *
- * @ioc's cgroup has changed.  Set %CIC_CGROUP_CHANGED for all cic's.
+ * @ioc's cgroup has changed.  Set %ICQ_CGROUP_CHANGED for all icq's.
  * iosched is responsible for checking the bit and applying it on request
  * issue path.
  */
@@ -309,7 +307,7 @@ void ioc_cgroup_changed(struct io_context *ioc)
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioc->lock, flags);
-	ioc_set_changed(ioc, CIC_CGROUP_CHANGED);
+	ioc_set_changed(ioc, ICQ_CGROUP_CHANGED);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5f7e4d161404..d2f16fcdec7f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -54,13 +54,12 @@ static const int cfq_hist_divisor = 4;
 #define CFQQ_SECT_THR_NONROT	(sector_t)(2 * 32)
 #define CFQQ_SEEKY(cfqq)	(hweight32(cfqq->seek_history) > 32/8)
 
-#define RQ_CIC(rq)		\
-	((struct cfq_io_context *) (rq)->elevator_private[0])
+#define RQ_CIC(rq)		icq_to_cic((rq)->elevator_private[0])
 #define RQ_CFQQ(rq)		(struct cfq_queue *) ((rq)->elevator_private[1])
 #define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elevator_private[2])
 
 static struct kmem_cache *cfq_pool;
-static struct kmem_cache *cfq_ioc_pool;
+static struct kmem_cache *cfq_icq_pool;
 
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -69,6 +68,14 @@ static struct kmem_cache *cfq_ioc_pool;
 #define sample_valid(samples)	((samples) > 80)
 #define rb_entry_cfqg(node)	rb_entry((node), struct cfq_group, rb_node)
 
+struct cfq_ttime {
+	unsigned long last_end_request;
+
+	unsigned long ttime_total;
+	unsigned long ttime_samples;
+	unsigned long ttime_mean;
+};
+
 /*
  * Most of our rbtree usage is for sorting with min extraction, so
  * if we cache the leftmost node we don't have to walk down the tree
@@ -210,6 +217,12 @@ struct cfq_group {
 	struct cfq_ttime ttime;
 };
 
+struct cfq_io_cq {
+	struct io_cq		icq;		/* must be the first member */
+	struct cfq_queue	*cfqq[2];
+	struct cfq_ttime	ttime;
+};
+
 /*
  * Per block device queue structure
  */
@@ -261,7 +274,7 @@ struct cfq_data {
 	struct work_struct unplug_work;
 
 	struct cfq_queue *active_queue;
-	struct cfq_io_context *active_cic;
+	struct cfq_io_cq *active_cic;
 
 	/*
 	 * async queue for each priority case
@@ -284,7 +297,7 @@ struct cfq_data {
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
 
-	struct list_head cic_list;
+	struct list_head icq_list;
 
 	/*
 	 * Fallback dummy cfqq for extreme OOM conditions
@@ -457,24 +470,28 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
 				       struct io_context *, gfp_t);
-static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
-						struct io_context *);
+static struct cfq_io_cq *cfq_cic_lookup(struct cfq_data *, struct io_context *);
 
-static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
-					    bool is_sync)
+static inline struct cfq_io_cq *icq_to_cic(struct io_cq *icq)
+{
+	/* cic->icq is the first member, %NULL will convert to %NULL */
+	return container_of(icq, struct cfq_io_cq, icq);
+}
+
+static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_cq *cic, bool is_sync)
 {
 	return cic->cfqq[is_sync];
 }
 
-static inline void cic_set_cfqq(struct cfq_io_context *cic,
-				struct cfq_queue *cfqq, bool is_sync)
+static inline void cic_set_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq,
+				bool is_sync)
 {
 	cic->cfqq[is_sync] = cfqq;
 }
 
-static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
+static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
 {
-	return cic->q->elevator->elevator_data;
+	return cic->icq.q->elevator->elevator_data;
 }
 
 /*
@@ -1541,7 +1558,7 @@ static struct request *
 cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
 {
 	struct task_struct *tsk = current;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	struct cfq_queue *cfqq;
 
 	cic = cfq_cic_lookup(cfqd, tsk->io_context);
@@ -1655,7 +1672,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 			   struct bio *bio)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	struct cfq_queue *cfqq;
 
 	/*
@@ -1671,7 +1688,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 	 * and %current are guaranteed to be equal.  Avoid lookup which
 	 * requires queue_lock by using @rq's cic.
 	 */
-	if (current->io_context == RQ_CIC(rq)->ioc) {
+	if (current->io_context == RQ_CIC(rq)->icq.ioc) {
 		cic = RQ_CIC(rq);
 	} else {
 		cic = cfq_cic_lookup(cfqd, current->io_context);
@@ -1761,7 +1778,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		cfqd->active_queue = NULL;
 
 	if (cfqd->active_cic) {
-		put_io_context(cfqd->active_cic->ioc, cfqd->queue);
+		put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
 		cfqd->active_cic = NULL;
 	}
 }
@@ -1981,7 +1998,7 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 {
 	struct cfq_queue *cfqq = cfqd->active_queue;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	unsigned long sl, group_idle = 0;
 
 	/*
@@ -2016,7 +2033,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	 * task has exited, don't wait
 	 */
 	cic = cfqd->active_cic;
-	if (!cic || !atomic_read(&cic->ioc->nr_tasks))
+	if (!cic || !atomic_read(&cic->icq.ioc->nr_tasks))
 		return;
 
 	/*
@@ -2567,9 +2584,9 @@ static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_dispatch_insert(cfqd->queue, rq);
 
 	if (!cfqd->active_cic) {
-		struct cfq_io_context *cic = RQ_CIC(rq);
+		struct cfq_io_cq *cic = RQ_CIC(rq);
 
-		atomic_long_inc(&cic->ioc->refcount);
+		atomic_long_inc(&cic->icq.ioc->refcount);
 		cfqd->active_cic = cic;
 	}
 
@@ -2652,24 +2669,24 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	cfq_put_cfqg(cfqg);
 }
 
-static void cfq_cic_free_rcu(struct rcu_head *head)
+static void cfq_icq_free_rcu(struct rcu_head *head)
 {
-	kmem_cache_free(cfq_ioc_pool,
-			container_of(head, struct cfq_io_context, rcu_head));
+	kmem_cache_free(cfq_icq_pool,
+			icq_to_cic(container_of(head, struct io_cq, rcu_head)));
 }
 
-static void cfq_cic_free(struct cfq_io_context *cic)
+static void cfq_icq_free(struct io_cq *icq)
 {
-	call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
+	call_rcu(&icq->rcu_head, cfq_icq_free_rcu);
 }
 
-static void cfq_release_cic(struct cfq_io_context *cic)
+static void cfq_release_icq(struct io_cq *icq)
 {
-	struct io_context *ioc = cic->ioc;
+	struct io_context *ioc = icq->ioc;
 
-	radix_tree_delete(&ioc->radix_root, cic->q->id);
-	hlist_del(&cic->cic_list);
-	cfq_cic_free(cic);
+	radix_tree_delete(&ioc->icq_tree, icq->q->id);
+	hlist_del(&icq->ioc_node);
+	cfq_icq_free(icq);
 }
 
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
@@ -2705,20 +2722,21 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_put_queue(cfqq);
 }
 
-static void cfq_exit_cic(struct cfq_io_context *cic)
+static void cfq_exit_icq(struct io_cq *icq)
 {
+	struct cfq_io_cq *cic = icq_to_cic(icq);
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
-	struct io_context *ioc = cic->ioc;
+	struct io_context *ioc = icq->ioc;
 
-	list_del_init(&cic->queue_list);
+	list_del_init(&icq->q_node);
 
 	/*
-	 * Both setting lookup hint to and clearing it from @cic are done
-	 * under queue_lock.  If it's not pointing to @cic now, it never
+	 * Both setting lookup hint to and clearing it from @icq are done
+	 * under queue_lock.  If it's not pointing to @icq now, it never
 	 * will.  Hint assignment itself can race safely.
 	 */
-	if (rcu_dereference_raw(ioc->ioc_data) == cic)
-		rcu_assign_pointer(ioc->ioc_data, NULL);
+	if (rcu_dereference_raw(ioc->icq_hint) == icq)
+		rcu_assign_pointer(ioc->icq_hint, NULL);
 
 	if (cic->cfqq[BLK_RW_ASYNC]) {
 		cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
@@ -2731,19 +2749,18 @@ static void cfq_exit_cic(struct cfq_io_context *cic)
 	}
 }
 
-static struct cfq_io_context *
-cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
+static struct cfq_io_cq *cfq_alloc_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 
-	cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
+	cic = kmem_cache_alloc_node(cfq_icq_pool, gfp_mask | __GFP_ZERO,
 							cfqd->queue->node);
 	if (cic) {
 		cic->ttime.last_end_request = jiffies;
-		INIT_LIST_HEAD(&cic->queue_list);
-		INIT_HLIST_NODE(&cic->cic_list);
-		cic->exit = cfq_exit_cic;
-		cic->release = cfq_release_cic;
+		INIT_LIST_HEAD(&cic->icq.q_node);
+		INIT_HLIST_NODE(&cic->icq.ioc_node);
+		cic->icq.exit = cfq_exit_icq;
+		cic->icq.release = cfq_release_icq;
 	}
 
 	return cic;
@@ -2791,7 +2808,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 	cfq_clear_cfqq_prio_changed(cfqq);
 }
 
-static void changed_ioprio(struct cfq_io_context *cic)
+static void changed_ioprio(struct cfq_io_cq *cic)
 {
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
 	struct cfq_queue *cfqq;
@@ -2802,7 +2819,7 @@ static void changed_ioprio(struct cfq_io_context *cic)
 	cfqq = cic->cfqq[BLK_RW_ASYNC];
 	if (cfqq) {
 		struct cfq_queue *new_cfqq;
-		new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
+		new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->icq.ioc,
 						GFP_ATOMIC);
 		if (new_cfqq) {
 			cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
@@ -2836,7 +2853,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-static void changed_cgroup(struct cfq_io_context *cic)
+static void changed_cgroup(struct cfq_io_cq *cic)
 {
 	struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
@@ -2864,7 +2881,7 @@ cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
 		     struct io_context *ioc, gfp_t gfp_mask)
 {
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	struct cfq_group *cfqg;
 
 retry:
@@ -2956,56 +2973,57 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 }
 
 /**
- * cfq_cic_lookup - lookup cfq_io_context
+ * cfq_cic_lookup - lookup cfq_io_cq
  * @cfqd: the associated cfq_data
  * @ioc: the associated io_context
  *
- * Look up cfq_io_context associated with @cfqd - @ioc pair.  Must be
- * called with queue_lock held.
+ * Look up cfq_io_cq associated with @cfqd - @ioc pair.  Must be called
+ * with queue_lock held.
  */
-static struct cfq_io_context *
+static struct cfq_io_cq *
 cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
 {
 	struct request_queue *q = cfqd->queue;
-	struct cfq_io_context *cic;
+	struct io_cq *icq;
 
 	lockdep_assert_held(cfqd->queue->queue_lock);
 	if (unlikely(!ioc))
 		return NULL;
 
 	/*
-	 * cic's are indexed from @ioc using radix tree and hint pointer,
+	 * icq's are indexed from @ioc using radix tree and hint pointer,
 	 * both of which are protected with RCU.  All removals are done
 	 * holding both q and ioc locks, and we're holding q lock - if we
-	 * find a cic which points to us, it's guaranteed to be valid.
+	 * find a icq which points to us, it's guaranteed to be valid.
 	 */
 	rcu_read_lock();
-	cic = rcu_dereference(ioc->ioc_data);
-	if (cic && cic->q == q)
+	icq = rcu_dereference(ioc->icq_hint);
+	if (icq && icq->q == q)
 		goto out;
 
-	cic = radix_tree_lookup(&ioc->radix_root, cfqd->queue->id);
-	if (cic && cic->q == q)
-		rcu_assign_pointer(ioc->ioc_data, cic);	/* allowed to race */
+	icq = radix_tree_lookup(&ioc->icq_tree, cfqd->queue->id);
+	if (icq && icq->q == q)
+		rcu_assign_pointer(ioc->icq_hint, icq);	/* allowed to race */
 	else
-		cic = NULL;
+		icq = NULL;
 out:
 	rcu_read_unlock();
-	return cic;
+	return icq_to_cic(icq);
 }
 
 /**
- * cfq_create_cic - create and link a cfq_io_context
+ * cfq_create_cic - create and link a cfq_io_cq
  * @cfqd: cfqd of interest
  * @gfp_mask: allocation mask
  *
- * Make sure cfq_io_context linking %current->io_context and @cfqd exists.
- * If ioc and/or cic doesn't exist, they will be created using @gfp_mask.
+ * Make sure cfq_io_cq linking %current->io_context and @cfqd exists.  If
+ * ioc and/or cic doesn't exist, they will be created using @gfp_mask.
  */
 static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct request_queue *q = cfqd->queue;
-	struct cfq_io_context *cic = NULL;
+	struct io_cq *icq = NULL;
+	struct cfq_io_cq *cic;
 	struct io_context *ioc;
 	int ret = -ENOMEM;
 
@@ -3016,26 +3034,27 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 	if (!ioc)
 		goto out;
 
-	cic = cfq_alloc_io_context(cfqd, gfp_mask);
+	cic = cfq_alloc_cic(cfqd, gfp_mask);
 	if (!cic)
 		goto out;
+	icq = &cic->icq;
 
 	ret = radix_tree_preload(gfp_mask);
 	if (ret)
 		goto out;
 
-	cic->ioc = ioc;
-	cic->q = cfqd->queue;
+	icq->ioc = ioc;
+	icq->q = cfqd->queue;
 
-	/* lock both q and ioc and try to link @cic */
+	/* lock both q and ioc and try to link @icq */
 	spin_lock_irq(q->queue_lock);
 	spin_lock(&ioc->lock);
 
-	ret = radix_tree_insert(&ioc->radix_root, q->id, cic);
+	ret = radix_tree_insert(&ioc->icq_tree, q->id, icq);
 	if (likely(!ret)) {
-		hlist_add_head(&cic->cic_list, &ioc->cic_list);
-		list_add(&cic->queue_list, &cfqd->cic_list);
-		cic = NULL;
+		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
+		list_add(&icq->q_node, &cfqd->icq_list);
+		icq = NULL;
 	} else if (ret == -EEXIST) {
 		/* someone else already did it */
 		ret = 0;
@@ -3047,29 +3066,28 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 	radix_tree_preload_end();
 out:
 	if (ret)
-		printk(KERN_ERR "cfq: cic link failed!\n");
-	if (cic)
-		cfq_cic_free(cic);
+		printk(KERN_ERR "cfq: icq link failed!\n");
+	if (icq)
+		cfq_icq_free(icq);
 	return ret;
 }
 
 /**
- * cfq_get_io_context - acquire cfq_io_context and bump refcnt on io_context
+ * cfq_get_cic - acquire cfq_io_cq and bump refcnt on io_context
  * @cfqd: cfqd to setup cic for
  * @gfp_mask: allocation mask
  *
- * Return cfq_io_context associating @cfqd and %current->io_context and
+ * Return cfq_io_cq associating @cfqd and %current->io_context and
  * bump refcnt on io_context.  If ioc or cic doesn't exist, they're created
  * using @gfp_mask.
  *
  * Must be called under queue_lock which may be released and re-acquired.
  * This function also may sleep depending on @gfp_mask.
  */
-static struct cfq_io_context *
-cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
+static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct request_queue *q = cfqd->queue;
-	struct cfq_io_context *cic = NULL;
+	struct cfq_io_cq *cic = NULL;
 	struct io_context *ioc;
 	int err;
 
@@ -3095,11 +3113,11 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 	/* bump @ioc's refcnt and handle changed notifications */
 	get_io_context(ioc);
 
-	if (unlikely(cic->changed)) {
-		if (test_and_clear_bit(CIC_IOPRIO_CHANGED, &cic->changed))
+	if (unlikely(cic->icq.changed)) {
+		if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
 			changed_ioprio(cic);
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-		if (test_and_clear_bit(CIC_CGROUP_CHANGED, &cic->changed))
+		if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
 			changed_cgroup(cic);
 #endif
 	}
@@ -3120,7 +3138,7 @@ __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
 
 static void
 cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-	struct cfq_io_context *cic)
+			struct cfq_io_cq *cic)
 {
 	if (cfq_cfqq_sync(cfqq)) {
 		__cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
@@ -3158,7 +3176,7 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
  */
 static void
 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-		       struct cfq_io_context *cic)
+		       struct cfq_io_cq *cic)
 {
 	int old_idle, enable_idle;
 
@@ -3175,8 +3193,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 
 	if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
 		enable_idle = 0;
-	else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
-	    (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
+	else if (!atomic_read(&cic->icq.ioc->nr_tasks) ||
+		 !cfqd->cfq_slice_idle ||
+		 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
 		enable_idle = 0;
 	else if (sample_valid(cic->ttime.ttime_samples)) {
 		if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
@@ -3308,7 +3327,7 @@ static void
 cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		struct request *rq)
 {
-	struct cfq_io_context *cic = RQ_CIC(rq);
+	struct cfq_io_cq *cic = RQ_CIC(rq);
 
 	cfqd->rq_queued++;
 	if (rq->cmd_flags & REQ_PRIO)
@@ -3361,7 +3380,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
 	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
 	cfq_log_cfqq(cfqd, cfqq, "insert_request");
-	cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
+	cfq_init_prio_data(cfqq, RQ_CIC(rq)->icq.ioc);
 
 	rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
@@ -3411,7 +3430,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
 
 static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-	struct cfq_io_context *cic = cfqd->active_cic;
+	struct cfq_io_cq *cic = cfqd->active_cic;
 
 	/* If the queue already has requests, don't wait */
 	if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -3548,7 +3567,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	struct cfq_queue *cfqq;
 
 	/*
@@ -3563,7 +3582,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
 
 	cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
 	if (cfqq) {
-		cfq_init_prio_data(cfqq, cic->ioc);
+		cfq_init_prio_data(cfqq, cic->icq.ioc);
 
 		return __cfq_may_queue(cfqq);
 	}
@@ -3584,7 +3603,7 @@ static void cfq_put_request(struct request *rq)
 		BUG_ON(!cfqq->allocated[rw]);
 		cfqq->allocated[rw]--;
 
-		put_io_context(RQ_CIC(rq)->ioc, cfqq->cfqd->queue);
+		put_io_context(RQ_CIC(rq)->icq.ioc, cfqq->cfqd->queue);
 
 		rq->elevator_private[0] = NULL;
 		rq->elevator_private[1] = NULL;
@@ -3598,7 +3617,7 @@ static void cfq_put_request(struct request *rq)
 }
 
 static struct cfq_queue *
-cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
+cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_cq *cic,
 		struct cfq_queue *cfqq)
 {
 	cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
@@ -3613,7 +3632,7 @@ cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
  * was the last process referring to said cfqq.
  */
 static struct cfq_queue *
-split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
+split_cfqq(struct cfq_io_cq *cic, struct cfq_queue *cfqq)
 {
 	if (cfqq_process_refs(cfqq) == 1) {
 		cfqq->pid = current->pid;
@@ -3636,7 +3655,7 @@ static int
 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_io_context *cic;
+	struct cfq_io_cq *cic;
 	const int rw = rq_data_dir(rq);
 	const bool is_sync = rq_is_sync(rq);
 	struct cfq_queue *cfqq;
@@ -3644,14 +3663,14 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
 	spin_lock_irq(q->queue_lock);
-	cic = cfq_get_io_context(cfqd, gfp_mask);
+	cic = cfq_get_cic(cfqd, gfp_mask);
 	if (!cic)
 		goto queue_fail;
 
 new_queue:
 	cfqq = cic_to_cfqq(cic, is_sync);
 	if (!cfqq || cfqq == &cfqd->oom_cfqq) {
-		cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
+		cfqq = cfq_get_queue(cfqd, is_sync, cic->icq.ioc, gfp_mask);
 		cic_set_cfqq(cic, cfqq, is_sync);
 	} else {
 		/*
@@ -3677,7 +3696,7 @@ new_queue:
 	cfqq->allocated[rw]++;
 
 	cfqq->ref++;
-	rq->elevator_private[0] = cic;
+	rq->elevator_private[0] = &cic->icq;
 	rq->elevator_private[1] = cfqq;
 	rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
 	spin_unlock_irq(q->queue_lock);
@@ -3791,15 +3810,14 @@ static void cfq_exit_queue(struct elevator_queue *e)
 	if (cfqd->active_queue)
 		__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
-	while (!list_empty(&cfqd->cic_list)) {
-		struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
-							struct cfq_io_context,
-							queue_list);
-		struct io_context *ioc = cic->ioc;
+	while (!list_empty(&cfqd->icq_list)) {
+		struct io_cq *icq = list_entry(cfqd->icq_list.next,
+					       struct io_cq, q_node);
+		struct io_context *ioc = icq->ioc;
 
 		spin_lock(&ioc->lock);
-		cfq_exit_cic(cic);
-		cfq_release_cic(cic);
+		cfq_exit_icq(icq);
+		cfq_release_icq(icq);
 		spin_unlock(&ioc->lock);
 	}
 
@@ -3904,7 +3922,7 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->oom_cfqq.ref++;
 	cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
 
-	INIT_LIST_HEAD(&cfqd->cic_list);
+	INIT_LIST_HEAD(&cfqd->icq_list);
 
 	cfqd->queue = q;
 
@@ -3942,8 +3960,8 @@ static void cfq_slab_kill(void)
 	 */
 	if (cfq_pool)
 		kmem_cache_destroy(cfq_pool);
-	if (cfq_ioc_pool)
-		kmem_cache_destroy(cfq_ioc_pool);
+	if (cfq_icq_pool)
+		kmem_cache_destroy(cfq_icq_pool);
 }
 
 static int __init cfq_slab_setup(void)
@@ -3952,8 +3970,8 @@ static int __init cfq_slab_setup(void)
 	if (!cfq_pool)
 		goto fail;
 
-	cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0);
-	if (!cfq_ioc_pool)
+	cfq_icq_pool = KMEM_CACHE(cfq_io_cq, 0);
+	if (!cfq_icq_pool)
 		goto fail;
 
 	return 0;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index b2b75a54f252..d15ca6591f96 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -5,38 +5,23 @@
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
 
-struct cfq_queue;
-struct cfq_ttime {
-	unsigned long last_end_request;
-
-	unsigned long ttime_total;
-	unsigned long ttime_samples;
-	unsigned long ttime_mean;
-};
-
 enum {
-	CIC_IOPRIO_CHANGED,
-	CIC_CGROUP_CHANGED,
+	ICQ_IOPRIO_CHANGED,
+	ICQ_CGROUP_CHANGED,
 };
 
-struct cfq_io_context {
-	struct request_queue *q;
-
-	struct cfq_queue *cfqq[2];
-
-	struct io_context *ioc;
-
-	struct cfq_ttime ttime;
-
-	struct list_head queue_list;
-	struct hlist_node cic_list;
+struct io_cq {
+	struct request_queue	*q;
+	struct io_context	*ioc;
 
-	unsigned long changed;
+	struct list_head	q_node;
+	struct hlist_node	ioc_node;
 
-	void (*exit)(struct cfq_io_context *);
-	void (*release)(struct cfq_io_context *);
+	unsigned long		changed;
+	struct rcu_head		rcu_head;
 
-	struct rcu_head rcu_head;
+	void (*exit)(struct io_cq *);
+	void (*release)(struct io_cq *);
 };
 
 /*
@@ -58,9 +43,9 @@ struct io_context {
 	int nr_batch_requests;     /* Number of requests left in the batch */
 	unsigned long last_waited; /* Time last woken after wait for request */
 
-	struct radix_tree_root radix_root;
-	struct hlist_head cic_list;
-	void __rcu *ioc_data;
+	struct radix_tree_root	icq_tree;
+	struct io_cq __rcu	*icq_hint;
+	struct hlist_head	icq_list;
 
 	struct work_struct release_work;
 };
-- 
cgit v1.2.3


From a612fddf0d8090f2877305c9168b6c1a34fb5d90 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:41 +0100
Subject: block, cfq: move cfqd->icq_list to request_queue and add
 request->elv.icq

Most of icq management is about to be moved out of cfq into blk-ioc.
This patch prepares for it.

* Move cfqd->icq_list to request_queue->icq_list

* Make request explicitly point to icq instead of through elevator
  private data.  ->elevator_private[3] is replaced with sub struct elv
  which contains icq pointer and priv[2].  cfq is updated accordingly.

* Meaningless clearing of ->elevator_private[0] removed from
  elv_set_request().  At that point in code, the field was guaranteed
  to be %NULL anyway.

This patch doesn't introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |  1 +
 block/cfq-iosched.c    | 28 +++++++++++-----------------
 block/elevator.c       |  2 --
 include/linux/blkdev.h | 10 ++++++++--
 4 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 6804fdf27eff..3c26c7f48703 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -497,6 +497,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 		    laptop_mode_timer_fn, (unsigned long) q);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
+	INIT_LIST_HEAD(&q->icq_list);
 	INIT_LIST_HEAD(&q->flush_queue[0]);
 	INIT_LIST_HEAD(&q->flush_queue[1]);
 	INIT_LIST_HEAD(&q->flush_data_in_flight);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d2f16fcdec7f..9bc5ecc1b336 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -54,9 +54,9 @@ static const int cfq_hist_divisor = 4;
 #define CFQQ_SECT_THR_NONROT	(sector_t)(2 * 32)
 #define CFQQ_SEEKY(cfqq)	(hweight32(cfqq->seek_history) > 32/8)
 
-#define RQ_CIC(rq)		icq_to_cic((rq)->elevator_private[0])
-#define RQ_CFQQ(rq)		(struct cfq_queue *) ((rq)->elevator_private[1])
-#define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elevator_private[2])
+#define RQ_CIC(rq)		icq_to_cic((rq)->elv.icq)
+#define RQ_CFQQ(rq)		(struct cfq_queue *) ((rq)->elv.priv[0])
+#define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elv.priv[1])
 
 static struct kmem_cache *cfq_pool;
 static struct kmem_cache *cfq_icq_pool;
@@ -297,8 +297,6 @@ struct cfq_data {
 	unsigned int cfq_group_idle;
 	unsigned int cfq_latency;
 
-	struct list_head icq_list;
-
 	/*
 	 * Fallback dummy cfqq for extreme OOM conditions
 	 */
@@ -3053,7 +3051,7 @@ static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 	ret = radix_tree_insert(&ioc->icq_tree, q->id, icq);
 	if (likely(!ret)) {
 		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
-		list_add(&icq->q_node, &cfqd->icq_list);
+		list_add(&icq->q_node, &q->icq_list);
 		icq = NULL;
 	} else if (ret == -EEXIST) {
 		/* someone else already did it */
@@ -3605,12 +3603,10 @@ static void cfq_put_request(struct request *rq)
 
 		put_io_context(RQ_CIC(rq)->icq.ioc, cfqq->cfqd->queue);
 
-		rq->elevator_private[0] = NULL;
-		rq->elevator_private[1] = NULL;
-
 		/* Put down rq reference on cfqg */
 		cfq_put_cfqg(RQ_CFQG(rq));
-		rq->elevator_private[2] = NULL;
+		rq->elv.priv[0] = NULL;
+		rq->elv.priv[1] = NULL;
 
 		cfq_put_queue(cfqq);
 	}
@@ -3696,9 +3692,9 @@ new_queue:
 	cfqq->allocated[rw]++;
 
 	cfqq->ref++;
-	rq->elevator_private[0] = &cic->icq;
-	rq->elevator_private[1] = cfqq;
-	rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
+	rq->elv.icq = &cic->icq;
+	rq->elv.priv[0] = cfqq;
+	rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
 	spin_unlock_irq(q->queue_lock);
 	return 0;
 
@@ -3810,8 +3806,8 @@ static void cfq_exit_queue(struct elevator_queue *e)
 	if (cfqd->active_queue)
 		__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
-	while (!list_empty(&cfqd->icq_list)) {
-		struct io_cq *icq = list_entry(cfqd->icq_list.next,
+	while (!list_empty(&q->icq_list)) {
+		struct io_cq *icq = list_entry(q->icq_list.next,
 					       struct io_cq, q_node);
 		struct io_context *ioc = icq->ioc;
 
@@ -3922,8 +3918,6 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->oom_cfqq.ref++;
 	cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
 
-	INIT_LIST_HEAD(&cfqd->icq_list);
-
 	cfqd->queue = q;
 
 	init_timer(&cfqd->idle_slice_timer);
diff --git a/block/elevator.c b/block/elevator.c
index 31ffe76aed3d..c5c6214829cb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -745,8 +745,6 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 
 	if (e->type->ops.elevator_set_req_fn)
 		return e->type->ops.elevator_set_req_fn(q, rq, gfp_mask);
-
-	rq->elevator_private[0] = NULL;
 	return 0;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 65c2f8c70089..8bca04873f53 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -111,10 +111,14 @@ struct request {
 	 * Three pointers are available for the IO schedulers, if they need
 	 * more they have to dynamically allocate it.  Flush requests are
 	 * never put on the IO scheduler. So let the flush fields share
-	 * space with the three elevator_private pointers.
+	 * space with the elevator data.
 	 */
 	union {
-		void *elevator_private[3];
+		struct {
+			struct io_cq		*icq;
+			void			*priv[2];
+		} elv;
+
 		struct {
 			unsigned int		seq;
 			struct list_head	list;
@@ -357,6 +361,8 @@ struct request_queue {
 	struct timer_list	timeout;
 	struct list_head	timeout_list;
 
+	struct list_head	icq_list;
+
 	struct queue_limits	limits;
 
 	/*
-- 
cgit v1.2.3


From 3d3c2379feb177a5fd55bb0ed76776dc9d4f3243 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:42 +0100
Subject: block, cfq: move icq cache management to block core

Let elevators set ->icq_size and ->icq_align in elevator_type and
elv_register() and elv_unregister() respectively create and destroy
kmem_cache for icq.

* elv_register() now can return failure.  All callers updated.

* icq caches are automatically named "ELVNAME_io_cq".

* cfq_slab_setup/kill() are collapsed into cfq_init/exit().

* While at it, minor indentation change for iosched_cfq.elevator_name
  for consistency.

This will help moving icq management to block core.  This doesn't
introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c      | 48 +++++++++++++++---------------------------------
 block/deadline-iosched.c |  4 +---
 block/elevator.c         | 37 +++++++++++++++++++++++++++++++++++--
 block/noop-iosched.c     |  4 +---
 include/linux/elevator.h | 11 ++++++++++-
 5 files changed, 62 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 048fa699adf9..06e59abcb57f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3914,34 +3914,6 @@ static void *cfq_init_queue(struct request_queue *q)
 	return cfqd;
 }
 
-static void cfq_slab_kill(void)
-{
-	/*
-	 * Caller already ensured that pending RCU callbacks are completed,
-	 * so we should have no busy allocations at this point.
-	 */
-	if (cfq_pool)
-		kmem_cache_destroy(cfq_pool);
-	if (cfq_icq_pool)
-		kmem_cache_destroy(cfq_icq_pool);
-}
-
-static int __init cfq_slab_setup(void)
-{
-	cfq_pool = KMEM_CACHE(cfq_queue, 0);
-	if (!cfq_pool)
-		goto fail;
-
-	cfq_icq_pool = KMEM_CACHE(cfq_io_cq, 0);
-	if (!cfq_icq_pool)
-		goto fail;
-
-	return 0;
-fail:
-	cfq_slab_kill();
-	return -ENOMEM;
-}
-
 /*
  * sysfs parts below -->
  */
@@ -4053,8 +4025,10 @@ static struct elevator_type iosched_cfq = {
 		.elevator_init_fn =		cfq_init_queue,
 		.elevator_exit_fn =		cfq_exit_queue,
 	},
+	.icq_size	=	sizeof(struct cfq_io_cq),
+	.icq_align	=	__alignof__(struct cfq_io_cq),
 	.elevator_attrs =	cfq_attrs,
-	.elevator_name =	"cfq",
+	.elevator_name	=	"cfq",
 	.elevator_owner =	THIS_MODULE,
 };
 
@@ -4072,6 +4046,8 @@ static struct blkio_policy_type blkio_policy_cfq;
 
 static int __init cfq_init(void)
 {
+	int ret;
+
 	/*
 	 * could be 0 on HZ < 1000 setups
 	 */
@@ -4086,10 +4062,17 @@ static int __init cfq_init(void)
 #else
 		cfq_group_idle = 0;
 #endif
-	if (cfq_slab_setup())
+	cfq_pool = KMEM_CACHE(cfq_queue, 0);
+	if (!cfq_pool)
 		return -ENOMEM;
 
-	elv_register(&iosched_cfq);
+	ret = elv_register(&iosched_cfq);
+	if (ret) {
+		kmem_cache_destroy(cfq_pool);
+		return ret;
+	}
+	cfq_icq_pool = iosched_cfq.icq_cache;
+
 	blkio_policy_register(&blkio_policy_cfq);
 
 	return 0;
@@ -4099,8 +4082,7 @@ static void __exit cfq_exit(void)
 {
 	blkio_policy_unregister(&blkio_policy_cfq);
 	elv_unregister(&iosched_cfq);
-	rcu_barrier();	/* make sure all cic RCU frees are complete */
-	cfq_slab_kill();
+	kmem_cache_destroy(cfq_pool);
 }
 
 module_init(cfq_init);
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c644137d9cd6..7bf12d793fcd 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -448,9 +448,7 @@ static struct elevator_type iosched_deadline = {
 
 static int __init deadline_init(void)
 {
-	elv_register(&iosched_deadline);
-
-	return 0;
+	return elv_register(&iosched_deadline);
 }
 
 static void __exit deadline_exit(void)
diff --git a/block/elevator.c b/block/elevator.c
index c5c6214829cb..cca049fb45c8 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -886,15 +886,36 @@ void elv_unregister_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(elv_unregister_queue);
 
-void elv_register(struct elevator_type *e)
+int elv_register(struct elevator_type *e)
 {
 	char *def = "";
 
+	/* create icq_cache if requested */
+	if (e->icq_size) {
+		if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
+		    WARN_ON(e->icq_align < __alignof__(struct io_cq)))
+			return -EINVAL;
+
+		snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
+			 "%s_io_cq", e->elevator_name);
+		e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
+						 e->icq_align, 0, NULL);
+		if (!e->icq_cache)
+			return -ENOMEM;
+	}
+
+	/* register, don't allow duplicate names */
 	spin_lock(&elv_list_lock);
-	BUG_ON(elevator_find(e->elevator_name));
+	if (elevator_find(e->elevator_name)) {
+		spin_unlock(&elv_list_lock);
+		if (e->icq_cache)
+			kmem_cache_destroy(e->icq_cache);
+		return -EBUSY;
+	}
 	list_add_tail(&e->list, &elv_list);
 	spin_unlock(&elv_list_lock);
 
+	/* print pretty message */
 	if (!strcmp(e->elevator_name, chosen_elevator) ||
 			(!*chosen_elevator &&
 			 !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))
@@ -902,14 +923,26 @@ void elv_register(struct elevator_type *e)
 
 	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
 								def);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(elv_register);
 
 void elv_unregister(struct elevator_type *e)
 {
+	/* unregister */
 	spin_lock(&elv_list_lock);
 	list_del_init(&e->list);
 	spin_unlock(&elv_list_lock);
+
+	/*
+	 * Destroy icq_cache if it exists.  icq's are RCU managed.  Make
+	 * sure all RCU operations are complete before proceeding.
+	 */
+	if (e->icq_cache) {
+		rcu_barrier();
+		kmem_cache_destroy(e->icq_cache);
+		e->icq_cache = NULL;
+	}
 }
 EXPORT_SYMBOL_GPL(elv_unregister);
 
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 06389e9ef96d..413a0b1d788c 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -94,9 +94,7 @@ static struct elevator_type elevator_noop = {
 
 static int __init noop_init(void)
 {
-	elv_register(&elevator_noop);
-
-	return 0;
+	return elv_register(&elevator_noop);
 }
 
 static void __exit noop_exit(void)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 04958ef53e62..d3d3e28cbfd4 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -78,10 +78,19 @@ struct elv_fs_entry {
  */
 struct elevator_type
 {
+	/* managed by elevator core */
+	struct kmem_cache *icq_cache;
+
+	/* fields provided by elevator implementation */
 	struct elevator_ops ops;
+	size_t icq_size;
+	size_t icq_align;
 	struct elv_fs_entry *elevator_attrs;
 	char elevator_name[ELV_NAME_MAX];
 	struct module *elevator_owner;
+
+	/* managed by elevator core */
+	char icq_cache_name[ELV_NAME_MAX + 5];	/* elvname + "_io_cq" */
 	struct list_head list;
 };
 
@@ -127,7 +136,7 @@ extern void elv_drain_elevator(struct request_queue *);
 /*
  * io scheduler registration
  */
-extern void elv_register(struct elevator_type *);
+extern int elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
 /*
-- 
cgit v1.2.3


From 7e5a8794492e43e9eebb68a98a23be055888ccd0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:42 +0100
Subject: block, cfq: move io_cq exit/release to blk-ioc.c

With kmem_cache managed by blk-ioc, io_cq exit/release can be moved to
blk-ioc too.  The odd ->io_cq->exit/release() callbacks are replaced
with elevator_ops->elevator_exit_icq_fn() with unlinking from both ioc
and q, and freeing automatically handled by blk-ioc.  The elevator
operation only need to perform exit operation specific to the elevator
- in cfq's case, exiting the cfqq's.

Also, clearing of io_cq's on q detach is moved to block core and
automatically performed on elevator switch and q release.

Because the q io_cq points to might be freed before RCU callback for
the io_cq runs, blk-ioc code should remember to which cache the io_cq
needs to be freed when the io_cq is released.  New field
io_cq->__rcu_icq_cache is added for this purpose.  As both the new
field and rcu_head are used only after io_cq is released and the
q/ioc_node fields aren't, they are put into unions.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-ioc.c           | 76 ++++++++++++++++++++++++++++++++++++++++++-----
 block/blk-sysfs.c         |  6 +++-
 block/blk.h               |  1 +
 block/cfq-iosched.c       | 47 ++---------------------------
 block/elevator.c          |  3 +-
 include/linux/elevator.h  |  5 ++++
 include/linux/iocontext.h | 20 +++++++++----
 7 files changed, 97 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 87ecc98b8ade..0910a5584d38 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -44,6 +44,51 @@ EXPORT_SYMBOL(get_io_context);
 #define ioc_release_depth_dec(q)	do { } while (0)
 #endif
 
+static void icq_free_icq_rcu(struct rcu_head *head)
+{
+	struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
+
+	kmem_cache_free(icq->__rcu_icq_cache, icq);
+}
+
+/*
+ * Exit and free an icq.  Called with both ioc and q locked.
+ */
+static void ioc_exit_icq(struct io_cq *icq)
+{
+	struct io_context *ioc = icq->ioc;
+	struct request_queue *q = icq->q;
+	struct elevator_type *et = q->elevator->type;
+
+	lockdep_assert_held(&ioc->lock);
+	lockdep_assert_held(q->queue_lock);
+
+	radix_tree_delete(&ioc->icq_tree, icq->q->id);
+	hlist_del_init(&icq->ioc_node);
+	list_del_init(&icq->q_node);
+
+	/*
+	 * Both setting lookup hint to and clearing it from @icq are done
+	 * under queue_lock.  If it's not pointing to @icq now, it never
+	 * will.  Hint assignment itself can race safely.
+	 */
+	if (rcu_dereference_raw(ioc->icq_hint) == icq)
+		rcu_assign_pointer(ioc->icq_hint, NULL);
+
+	if (et->ops.elevator_exit_icq_fn) {
+		ioc_release_depth_inc(q);
+		et->ops.elevator_exit_icq_fn(icq);
+		ioc_release_depth_dec(q);
+	}
+
+	/*
+	 * @icq->q might have gone away by the time RCU callback runs
+	 * making it impossible to determine icq_cache.  Record it in @icq.
+	 */
+	icq->__rcu_icq_cache = et->icq_cache;
+	call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
+}
+
 /*
  * Slow path for ioc release in put_io_context().  Performs double-lock
  * dancing to unlink all icq's and then frees ioc.
@@ -87,10 +132,7 @@ static void ioc_release_fn(struct work_struct *work)
 			spin_lock(&ioc->lock);
 			continue;
 		}
-		ioc_release_depth_inc(this_q);
-		icq->exit(icq);
-		icq->release(icq);
-		ioc_release_depth_dec(this_q);
+		ioc_exit_icq(icq);
 	}
 
 	if (last_q) {
@@ -167,10 +209,7 @@ void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
 			last_q = this_q;
 			continue;
 		}
-		ioc_release_depth_inc(this_q);
-		icq->exit(icq);
-		icq->release(icq);
-		ioc_release_depth_dec(this_q);
+		ioc_exit_icq(icq);
 	}
 
 	if (last_q && last_q != locked_q)
@@ -203,6 +242,27 @@ void exit_io_context(struct task_struct *task)
 	put_io_context(ioc, NULL);
 }
 
+/**
+ * ioc_clear_queue - break any ioc association with the specified queue
+ * @q: request_queue being cleared
+ *
+ * Walk @q->icq_list and exit all io_cq's.  Must be called with @q locked.
+ */
+void ioc_clear_queue(struct request_queue *q)
+{
+	lockdep_assert_held(q->queue_lock);
+
+	while (!list_empty(&q->icq_list)) {
+		struct io_cq *icq = list_entry(q->icq_list.next,
+					       struct io_cq, q_node);
+		struct io_context *ioc = icq->ioc;
+
+		spin_lock(&ioc->lock);
+		ioc_exit_icq(icq);
+		spin_unlock(&ioc->lock);
+	}
+}
+
 void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
 				int node)
 {
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 5b4b4ab5e785..cf150011d808 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -479,8 +479,12 @@ static void blk_release_queue(struct kobject *kobj)
 
 	blk_sync_queue(q);
 
-	if (q->elevator)
+	if (q->elevator) {
+		spin_lock_irq(q->queue_lock);
+		ioc_clear_queue(q);
+		spin_unlock_irq(q->queue_lock);
 		elevator_exit(q->elevator);
+	}
 
 	blk_throtl_exit(q);
 
diff --git a/block/blk.h b/block/blk.h
index 3c510a4b5054..ed4d9bf2ab16 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -200,6 +200,7 @@ static inline int blk_do_io_stat(struct request *rq)
  */
 void get_io_context(struct io_context *ioc);
 struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
+void ioc_clear_queue(struct request_queue *q);
 
 void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask,
 				int node);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 06e59abcb57f..f6d315551496 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2674,26 +2674,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 	cfq_put_cfqg(cfqg);
 }
 
-static void cfq_icq_free_rcu(struct rcu_head *head)
-{
-	kmem_cache_free(cfq_icq_pool,
-			icq_to_cic(container_of(head, struct io_cq, rcu_head)));
-}
-
-static void cfq_icq_free(struct io_cq *icq)
-{
-	call_rcu(&icq->rcu_head, cfq_icq_free_rcu);
-}
-
-static void cfq_release_icq(struct io_cq *icq)
-{
-	struct io_context *ioc = icq->ioc;
-
-	radix_tree_delete(&ioc->icq_tree, icq->q->id);
-	hlist_del(&icq->ioc_node);
-	cfq_icq_free(icq);
-}
-
 static void cfq_put_cooperator(struct cfq_queue *cfqq)
 {
 	struct cfq_queue *__cfqq, *next;
@@ -2731,17 +2711,6 @@ static void cfq_exit_icq(struct io_cq *icq)
 {
 	struct cfq_io_cq *cic = icq_to_cic(icq);
 	struct cfq_data *cfqd = cic_to_cfqd(cic);
-	struct io_context *ioc = icq->ioc;
-
-	list_del_init(&icq->q_node);
-
-	/*
-	 * Both setting lookup hint to and clearing it from @icq are done
-	 * under queue_lock.  If it's not pointing to @icq now, it never
-	 * will.  Hint assignment itself can race safely.
-	 */
-	if (rcu_dereference_raw(ioc->icq_hint) == icq)
-		rcu_assign_pointer(ioc->icq_hint, NULL);
 
 	if (cic->cfqq[BLK_RW_ASYNC]) {
 		cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
@@ -2764,8 +2733,6 @@ static struct cfq_io_cq *cfq_alloc_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 		cic->ttime.last_end_request = jiffies;
 		INIT_LIST_HEAD(&cic->icq.q_node);
 		INIT_HLIST_NODE(&cic->icq.ioc_node);
-		cic->icq.exit = cfq_exit_icq;
-		cic->icq.release = cfq_release_icq;
 	}
 
 	return cic;
@@ -3034,7 +3001,7 @@ out:
 	if (ret)
 		printk(KERN_ERR "cfq: icq link failed!\n");
 	if (icq)
-		cfq_icq_free(icq);
+		kmem_cache_free(cfq_icq_pool, icq);
 	return ret;
 }
 
@@ -3774,17 +3741,6 @@ static void cfq_exit_queue(struct elevator_queue *e)
 	if (cfqd->active_queue)
 		__cfq_slice_expired(cfqd, cfqd->active_queue, 0);
 
-	while (!list_empty(&q->icq_list)) {
-		struct io_cq *icq = list_entry(q->icq_list.next,
-					       struct io_cq, q_node);
-		struct io_context *ioc = icq->ioc;
-
-		spin_lock(&ioc->lock);
-		cfq_exit_icq(icq);
-		cfq_release_icq(icq);
-		spin_unlock(&ioc->lock);
-	}
-
 	cfq_put_async_queues(cfqd);
 	cfq_release_cfq_groups(cfqd);
 
@@ -4019,6 +3975,7 @@ static struct elevator_type iosched_cfq = {
 		.elevator_completed_req_fn =	cfq_completed_request,
 		.elevator_former_req_fn =	elv_rb_former_request,
 		.elevator_latter_req_fn =	elv_rb_latter_request,
+		.elevator_exit_icq_fn =		cfq_exit_icq,
 		.elevator_set_req_fn =		cfq_set_request,
 		.elevator_put_req_fn =		cfq_put_request,
 		.elevator_may_queue_fn =	cfq_may_queue,
diff --git a/block/elevator.c b/block/elevator.c
index cca049fb45c8..91e18f8af9be 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -979,8 +979,9 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 			goto fail_register;
 	}
 
-	/* done, replace the old one with new one and turn off BYPASS */
+	/* done, clear io_cq's, switch elevators and turn off BYPASS */
 	spin_lock_irq(q->queue_lock);
+	ioc_clear_queue(q);
 	old_elevator = q->elevator;
 	q->elevator = e;
 	spin_unlock_irq(q->queue_lock);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index d3d3e28cbfd4..06e4dd568717 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -5,6 +5,8 @@
 
 #ifdef CONFIG_BLOCK
 
+struct io_cq;
+
 typedef int (elevator_merge_fn) (struct request_queue *, struct request **,
 				 struct bio *);
 
@@ -24,6 +26,7 @@ typedef struct request *(elevator_request_list_fn) (struct request_queue *, stru
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
 typedef int (elevator_may_queue_fn) (struct request_queue *, int);
 
+typedef void (elevator_exit_icq_fn) (struct io_cq *);
 typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
 typedef void (elevator_put_req_fn) (struct request *);
 typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *);
@@ -56,6 +59,8 @@ struct elevator_ops
 	elevator_request_list_fn *elevator_former_req_fn;
 	elevator_request_list_fn *elevator_latter_req_fn;
 
+	elevator_exit_icq_fn *elevator_exit_icq_fn;
+
 	elevator_set_req_fn *elevator_set_req_fn;
 	elevator_put_req_fn *elevator_put_req_fn;
 
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index d15ca6591f96..ac390a34c0e7 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -14,14 +14,22 @@ struct io_cq {
 	struct request_queue	*q;
 	struct io_context	*ioc;
 
-	struct list_head	q_node;
-	struct hlist_node	ioc_node;
+	/*
+	 * q_node and ioc_node link io_cq through icq_list of q and ioc
+	 * respectively.  Both fields are unused once ioc_exit_icq() is
+	 * called and shared with __rcu_icq_cache and __rcu_head which are
+	 * used for RCU free of io_cq.
+	 */
+	union {
+		struct list_head	q_node;
+		struct kmem_cache	*__rcu_icq_cache;
+	};
+	union {
+		struct hlist_node	ioc_node;
+		struct rcu_head		__rcu_head;
+	};
 
 	unsigned long		changed;
-	struct rcu_head		rcu_head;
-
-	void (*exit)(struct io_cq *);
-	void (*release)(struct io_cq *);
 };
 
 /*
-- 
cgit v1.2.3


From 9b84cacd013996f244d85b3d873287c2a8f88658 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:42 +0100
Subject: block, cfq: restructure io_cq creation path for io_context interface
 cleanup

Add elevator_ops->elevator_init_icq_fn() and restructure
cfq_create_cic() and rename it to ioc_create_icq().

The new function expects its caller to pass in io_context, uses
elevator_type->icq_cache, handles generic init, calls the new elevator
operation for elevator specific initialization, and returns pointer to
created or looked up icq.  This leaves cfq_icq_pool variable without
any user.  Removed.

This prepares for io_context interface cleanup and doesn't introduce
any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/cfq-iosched.c      | 94 +++++++++++++++++++++---------------------------
 include/linux/elevator.h |  2 ++
 2 files changed, 43 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index f6d315551496..11f49d036845 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -59,7 +59,6 @@ static const int cfq_hist_divisor = 4;
 #define RQ_CFQG(rq)		(struct cfq_group *) ((rq)->elv.priv[1])
 
 static struct kmem_cache *cfq_pool;
-static struct kmem_cache *cfq_icq_pool;
 
 #define CFQ_PRIO_LISTS		IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -2707,6 +2706,13 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	cfq_put_queue(cfqq);
 }
 
+static void cfq_init_icq(struct io_cq *icq)
+{
+	struct cfq_io_cq *cic = icq_to_cic(icq);
+
+	cic->ttime.last_end_request = jiffies;
+}
+
 static void cfq_exit_icq(struct io_cq *icq)
 {
 	struct cfq_io_cq *cic = icq_to_cic(icq);
@@ -2723,21 +2729,6 @@ static void cfq_exit_icq(struct io_cq *icq)
 	}
 }
 
-static struct cfq_io_cq *cfq_alloc_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
-{
-	struct cfq_io_cq *cic;
-
-	cic = kmem_cache_alloc_node(cfq_icq_pool, gfp_mask | __GFP_ZERO,
-							cfqd->queue->node);
-	if (cic) {
-		cic->ttime.last_end_request = jiffies;
-		INIT_LIST_HEAD(&cic->icq.q_node);
-		INIT_HLIST_NODE(&cic->icq.ioc_node);
-	}
-
-	return cic;
-}
-
 static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 {
 	struct task_struct *tsk = current;
@@ -2945,64 +2936,62 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 }
 
 /**
- * cfq_create_cic - create and link a cfq_io_cq
- * @cfqd: cfqd of interest
+ * ioc_create_icq - create and link io_cq
+ * @q: request_queue of interest
  * @gfp_mask: allocation mask
  *
- * Make sure cfq_io_cq linking %current->io_context and @cfqd exists.  If
- * ioc and/or cic doesn't exist, they will be created using @gfp_mask.
+ * Make sure io_cq linking %current->io_context and @q exists.  If either
+ * io_context and/or icq don't exist, they will be created using @gfp_mask.
+ *
+ * The caller is responsible for ensuring @ioc won't go away and @q is
+ * alive and will stay alive until this function returns.
  */
-static int cfq_create_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
+static struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
 {
-	struct request_queue *q = cfqd->queue;
-	struct io_cq *icq = NULL;
-	struct cfq_io_cq *cic;
+	struct elevator_type *et = q->elevator->type;
 	struct io_context *ioc;
-	int ret = -ENOMEM;
-
-	might_sleep_if(gfp_mask & __GFP_WAIT);
+	struct io_cq *icq;
 
 	/* allocate stuff */
 	ioc = create_io_context(current, gfp_mask, q->node);
 	if (!ioc)
-		goto out;
+		return NULL;
 
-	cic = cfq_alloc_cic(cfqd, gfp_mask);
-	if (!cic)
-		goto out;
-	icq = &cic->icq;
+	icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
+				    q->node);
+	if (!icq)
+		return NULL;
 
-	ret = radix_tree_preload(gfp_mask);
-	if (ret)
-		goto out;
+	if (radix_tree_preload(gfp_mask) < 0) {
+		kmem_cache_free(et->icq_cache, icq);
+		return NULL;
+	}
 
 	icq->ioc = ioc;
-	icq->q = cfqd->queue;
+	icq->q = q;
+	INIT_LIST_HEAD(&icq->q_node);
+	INIT_HLIST_NODE(&icq->ioc_node);
 
 	/* lock both q and ioc and try to link @icq */
 	spin_lock_irq(q->queue_lock);
 	spin_lock(&ioc->lock);
 
-	ret = radix_tree_insert(&ioc->icq_tree, q->id, icq);
-	if (likely(!ret)) {
+	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
 		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
 		list_add(&icq->q_node, &q->icq_list);
-		icq = NULL;
-	} else if (ret == -EEXIST) {
-		/* someone else already did it */
-		ret = 0;
+		if (et->ops.elevator_init_icq_fn)
+			et->ops.elevator_init_icq_fn(icq);
+	} else {
+		kmem_cache_free(et->icq_cache, icq);
+		icq = ioc_lookup_icq(ioc, q);
+		if (!icq)
+			printk(KERN_ERR "cfq: icq link failed!\n");
 	}
 
 	spin_unlock(&ioc->lock);
 	spin_unlock_irq(q->queue_lock);
-
 	radix_tree_preload_end();
-out:
-	if (ret)
-		printk(KERN_ERR "cfq: icq link failed!\n");
-	if (icq)
-		kmem_cache_free(cfq_icq_pool, icq);
-	return ret;
+	return icq;
 }
 
 /**
@@ -3022,7 +3011,6 @@ static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 	struct request_queue *q = cfqd->queue;
 	struct cfq_io_cq *cic = NULL;
 	struct io_context *ioc;
-	int err;
 
 	lockdep_assert_held(q->queue_lock);
 
@@ -3037,9 +3025,9 @@ static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 		/* slow path - unlock, create missing ones and retry */
 		spin_unlock_irq(q->queue_lock);
-		err = cfq_create_cic(cfqd, gfp_mask);
+		cic = icq_to_cic(ioc_create_icq(q, gfp_mask));
 		spin_lock_irq(q->queue_lock);
-		if (err)
+		if (!cic)
 			return NULL;
 	}
 
@@ -3975,6 +3963,7 @@ static struct elevator_type iosched_cfq = {
 		.elevator_completed_req_fn =	cfq_completed_request,
 		.elevator_former_req_fn =	elv_rb_former_request,
 		.elevator_latter_req_fn =	elv_rb_latter_request,
+		.elevator_init_icq_fn =		cfq_init_icq,
 		.elevator_exit_icq_fn =		cfq_exit_icq,
 		.elevator_set_req_fn =		cfq_set_request,
 		.elevator_put_req_fn =		cfq_put_request,
@@ -4028,7 +4017,6 @@ static int __init cfq_init(void)
 		kmem_cache_destroy(cfq_pool);
 		return ret;
 	}
-	cfq_icq_pool = iosched_cfq.icq_cache;
 
 	blkio_policy_register(&blkio_policy_cfq);
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 06e4dd568717..c8f1e67a8ebe 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -26,6 +26,7 @@ typedef struct request *(elevator_request_list_fn) (struct request_queue *, stru
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
 typedef int (elevator_may_queue_fn) (struct request_queue *, int);
 
+typedef void (elevator_init_icq_fn) (struct io_cq *);
 typedef void (elevator_exit_icq_fn) (struct io_cq *);
 typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, gfp_t);
 typedef void (elevator_put_req_fn) (struct request *);
@@ -59,6 +60,7 @@ struct elevator_ops
 	elevator_request_list_fn *elevator_former_req_fn;
 	elevator_request_list_fn *elevator_latter_req_fn;
 
+	elevator_init_icq_fn *elevator_init_icq_fn;
 	elevator_exit_icq_fn *elevator_exit_icq_fn;
 
 	elevator_set_req_fn *elevator_set_req_fn;
-- 
cgit v1.2.3


From f1f8cc94651738b418ba54c039df536303b91704 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 Dec 2011 00:33:42 +0100
Subject: block, cfq: move icq creation and rq->elv.icq association to block
 core

Now block layer knows everything necessary to create and associate
icq's with requests.  Move ioc_create_icq() to blk-ioc.c and update
get_request() such that, if elevator_type->icq_size is set, requests
are automatically associated with their matching icq's before
elv_set_request().  io_context reference is also managed by block core
on request alloc/free.

* Only ioprio/cgroup changed handling remains from cfq_get_cic().
  Collapsed into cfq_set_request().

* This removes queue kicking on icq allocation failure (for now).  As
  icq allocation failure is rare and the only effect of queue kicking
  achieved was possibily accelerating queue processing, this change
  shouldn't be noticeable.

  There is a larger underlying problem.  Unlike request allocation,
  icq allocation is not guaranteed to succeed eventually after
  retries.  The number of icq is unbound and thus mempool can't be the
  solution either.  This effectively adds allocation dependency on
  memory free path and thus possibility of deadlock.

  This usually wouldn't happen because icq allocation is not a hot
  path and, even when the condition triggers, it's highly unlikely
  that none of the writeback workers already has icq.

  However, this is still possible especially if elevator is being
  switched under high memory pressure, so we better get it fixed.
  Probably the only solution is just bypassing elevator and appending
  to dispatch queue on any elevator allocation failure.

* Comment added to explain how icq's are managed and synchronized.

This completes cleanup of io_context interface.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          |  46 +++++++++++++---
 block/blk-ioc.c           |  60 ++++++++++++++++++++-
 block/blk.h               |   1 +
 block/cfq-iosched.c       | 135 ++++------------------------------------------
 include/linux/elevator.h  |   8 +--
 include/linux/iocontext.h |  59 ++++++++++++++++++++
 6 files changed, 173 insertions(+), 136 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 3c26c7f48703..8fbdac7010bb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -640,13 +640,18 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(struct request_queue *q, struct request *rq)
 {
-	if (rq->cmd_flags & REQ_ELVPRIV)
+	if (rq->cmd_flags & REQ_ELVPRIV) {
 		elv_put_request(q, rq);
+		if (rq->elv.icq)
+			put_io_context(rq->elv.icq->ioc, q);
+	}
+
 	mempool_free(rq, q->rq.rq_pool);
 }
 
 static struct request *
-blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
+blk_alloc_request(struct request_queue *q, struct io_cq *icq,
+		  unsigned int flags, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -657,10 +662,15 @@ blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
 
 	rq->cmd_flags = flags | REQ_ALLOCED;
 
-	if ((flags & REQ_ELVPRIV) &&
-	    unlikely(elv_set_request(q, rq, gfp_mask))) {
-		mempool_free(rq, q->rq.rq_pool);
-		return NULL;
+	if (flags & REQ_ELVPRIV) {
+		rq->elv.icq = icq;
+		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
+			mempool_free(rq, q->rq.rq_pool);
+			return NULL;
+		}
+		/* @rq->elv.icq holds on to io_context until @rq is freed */
+		if (icq)
+			get_io_context(icq->ioc);
 	}
 
 	return rq;
@@ -772,11 +782,14 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
+	struct elevator_type *et;
 	struct io_context *ioc;
+	struct io_cq *icq = NULL;
 	const bool is_sync = rw_is_sync(rw_flags) != 0;
 	bool retried = false;
 	int may_queue;
 retry:
+	et = q->elevator->type;
 	ioc = current->io_context;
 
 	if (unlikely(blk_queue_dead(q)))
@@ -837,17 +850,36 @@ retry:
 	rl->count[is_sync]++;
 	rl->starved[is_sync] = 0;
 
+	/*
+	 * Decide whether the new request will be managed by elevator.  If
+	 * so, mark @rw_flags and increment elvpriv.  Non-zero elvpriv will
+	 * prevent the current elevator from being destroyed until the new
+	 * request is freed.  This guarantees icq's won't be destroyed and
+	 * makes creating new ones safe.
+	 *
+	 * Also, lookup icq while holding queue_lock.  If it doesn't exist,
+	 * it will be created after releasing queue_lock.
+	 */
 	if (blk_rq_should_init_elevator(bio) &&
 	    !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
 		rw_flags |= REQ_ELVPRIV;
 		rl->elvpriv++;
+		if (et->icq_cache && ioc)
+			icq = ioc_lookup_icq(ioc, q);
 	}
 
 	if (blk_queue_io_stat(q))
 		rw_flags |= REQ_IO_STAT;
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw_flags, gfp_mask);
+	/* create icq if missing */
+	if (unlikely(et->icq_cache && !icq))
+		icq = ioc_create_icq(q, gfp_mask);
+
+	/* rqs are guaranteed to have icq on elv_set_request() if requested */
+	if (likely(!et->icq_cache || icq))
+		rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
+
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 0910a5584d38..c04d16b02225 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -289,7 +289,6 @@ void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_flags,
 		kmem_cache_free(iocontext_cachep, ioc);
 	task_unlock(task);
 }
-EXPORT_SYMBOL(create_io_context_slowpath);
 
 /**
  * get_task_io_context - get io_context of a task
@@ -362,6 +361,65 @@ out:
 }
 EXPORT_SYMBOL(ioc_lookup_icq);
 
+/**
+ * ioc_create_icq - create and link io_cq
+ * @q: request_queue of interest
+ * @gfp_mask: allocation mask
+ *
+ * Make sure io_cq linking %current->io_context and @q exists.  If either
+ * io_context and/or icq don't exist, they will be created using @gfp_mask.
+ *
+ * The caller is responsible for ensuring @ioc won't go away and @q is
+ * alive and will stay alive until this function returns.
+ */
+struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
+{
+	struct elevator_type *et = q->elevator->type;
+	struct io_context *ioc;
+	struct io_cq *icq;
+
+	/* allocate stuff */
+	ioc = create_io_context(current, gfp_mask, q->node);
+	if (!ioc)
+		return NULL;
+
+	icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
+				    q->node);
+	if (!icq)
+		return NULL;
+
+	if (radix_tree_preload(gfp_mask) < 0) {
+		kmem_cache_free(et->icq_cache, icq);
+		return NULL;
+	}
+
+	icq->ioc = ioc;
+	icq->q = q;
+	INIT_LIST_HEAD(&icq->q_node);
+	INIT_HLIST_NODE(&icq->ioc_node);
+
+	/* lock both q and ioc and try to link @icq */
+	spin_lock_irq(q->queue_lock);
+	spin_lock(&ioc->lock);
+
+	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
+		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
+		list_add(&icq->q_node, &q->icq_list);
+		if (et->ops.elevator_init_icq_fn)
+			et->ops.elevator_init_icq_fn(icq);
+	} else {
+		kmem_cache_free(et->icq_cache, icq);
+		icq = ioc_lookup_icq(ioc, q);
+		if (!icq)
+			printk(KERN_ERR "cfq: icq link failed!\n");
+	}
+
+	spin_unlock(&ioc->lock);
+	spin_unlock_irq(q->queue_lock);
+	radix_tree_preload_end();
+	return icq;
+}
+
 void ioc_set_changed(struct io_context *ioc, int which)
 {
 	struct io_cq *icq;
diff --git a/block/blk.h b/block/blk.h
index ed4d9bf2ab16..7efd772336de 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -200,6 +200,7 @@ static inline int blk_do_io_stat(struct request *rq)
  */
 void get_io_context(struct io_context *ioc);
 struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
+struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask);
 void ioc_clear_queue(struct request_queue *q);
 
 void create_io_context_slowpath(struct task_struct *task, gfp_t gfp_mask,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 11f49d036845..f3b44c394e6d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2935,117 +2935,6 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 	return cfqq;
 }
 
-/**
- * ioc_create_icq - create and link io_cq
- * @q: request_queue of interest
- * @gfp_mask: allocation mask
- *
- * Make sure io_cq linking %current->io_context and @q exists.  If either
- * io_context and/or icq don't exist, they will be created using @gfp_mask.
- *
- * The caller is responsible for ensuring @ioc won't go away and @q is
- * alive and will stay alive until this function returns.
- */
-static struct io_cq *ioc_create_icq(struct request_queue *q, gfp_t gfp_mask)
-{
-	struct elevator_type *et = q->elevator->type;
-	struct io_context *ioc;
-	struct io_cq *icq;
-
-	/* allocate stuff */
-	ioc = create_io_context(current, gfp_mask, q->node);
-	if (!ioc)
-		return NULL;
-
-	icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
-				    q->node);
-	if (!icq)
-		return NULL;
-
-	if (radix_tree_preload(gfp_mask) < 0) {
-		kmem_cache_free(et->icq_cache, icq);
-		return NULL;
-	}
-
-	icq->ioc = ioc;
-	icq->q = q;
-	INIT_LIST_HEAD(&icq->q_node);
-	INIT_HLIST_NODE(&icq->ioc_node);
-
-	/* lock both q and ioc and try to link @icq */
-	spin_lock_irq(q->queue_lock);
-	spin_lock(&ioc->lock);
-
-	if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
-		hlist_add_head(&icq->ioc_node, &ioc->icq_list);
-		list_add(&icq->q_node, &q->icq_list);
-		if (et->ops.elevator_init_icq_fn)
-			et->ops.elevator_init_icq_fn(icq);
-	} else {
-		kmem_cache_free(et->icq_cache, icq);
-		icq = ioc_lookup_icq(ioc, q);
-		if (!icq)
-			printk(KERN_ERR "cfq: icq link failed!\n");
-	}
-
-	spin_unlock(&ioc->lock);
-	spin_unlock_irq(q->queue_lock);
-	radix_tree_preload_end();
-	return icq;
-}
-
-/**
- * cfq_get_cic - acquire cfq_io_cq and bump refcnt on io_context
- * @cfqd: cfqd to setup cic for
- * @gfp_mask: allocation mask
- *
- * Return cfq_io_cq associating @cfqd and %current->io_context and
- * bump refcnt on io_context.  If ioc or cic doesn't exist, they're created
- * using @gfp_mask.
- *
- * Must be called under queue_lock which may be released and re-acquired.
- * This function also may sleep depending on @gfp_mask.
- */
-static struct cfq_io_cq *cfq_get_cic(struct cfq_data *cfqd, gfp_t gfp_mask)
-{
-	struct request_queue *q = cfqd->queue;
-	struct cfq_io_cq *cic = NULL;
-	struct io_context *ioc;
-
-	lockdep_assert_held(q->queue_lock);
-
-	while (true) {
-		/* fast path */
-		ioc = current->io_context;
-		if (likely(ioc)) {
-			cic = cfq_cic_lookup(cfqd, ioc);
-			if (likely(cic))
-				break;
-		}
-
-		/* slow path - unlock, create missing ones and retry */
-		spin_unlock_irq(q->queue_lock);
-		cic = icq_to_cic(ioc_create_icq(q, gfp_mask));
-		spin_lock_irq(q->queue_lock);
-		if (!cic)
-			return NULL;
-	}
-
-	/* bump @ioc's refcnt and handle changed notifications */
-	get_io_context(ioc);
-
-	if (unlikely(cic->icq.changed)) {
-		if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
-			changed_ioprio(cic);
-#ifdef CONFIG_CFQ_GROUP_IOSCHED
-		if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
-			changed_cgroup(cic);
-#endif
-	}
-
-	return cic;
-}
-
 static void
 __cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
 {
@@ -3524,8 +3413,6 @@ static void cfq_put_request(struct request *rq)
 		BUG_ON(!cfqq->allocated[rw]);
 		cfqq->allocated[rw]--;
 
-		put_io_context(RQ_CIC(rq)->icq.ioc, cfqq->cfqd->queue);
-
 		/* Put down rq reference on cfqg */
 		cfq_put_cfqg(RQ_CFQG(rq));
 		rq->elv.priv[0] = NULL;
@@ -3574,7 +3461,7 @@ static int
 cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_io_cq *cic;
+	struct cfq_io_cq *cic = icq_to_cic(rq->elv.icq);
 	const int rw = rq_data_dir(rq);
 	const bool is_sync = rq_is_sync(rq);
 	struct cfq_queue *cfqq;
@@ -3582,9 +3469,16 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
 	spin_lock_irq(q->queue_lock);
-	cic = cfq_get_cic(cfqd, gfp_mask);
-	if (!cic)
-		goto queue_fail;
+
+	/* handle changed notifications */
+	if (unlikely(cic->icq.changed)) {
+		if (test_and_clear_bit(ICQ_IOPRIO_CHANGED, &cic->icq.changed))
+			changed_ioprio(cic);
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+		if (test_and_clear_bit(ICQ_CGROUP_CHANGED, &cic->icq.changed))
+			changed_cgroup(cic);
+#endif
+	}
 
 new_queue:
 	cfqq = cic_to_cfqq(cic, is_sync);
@@ -3615,17 +3509,10 @@ new_queue:
 	cfqq->allocated[rw]++;
 
 	cfqq->ref++;
-	rq->elv.icq = &cic->icq;
 	rq->elv.priv[0] = cfqq;
 	rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg);
 	spin_unlock_irq(q->queue_lock);
 	return 0;
-
-queue_fail:
-	cfq_schedule_dispatch(cfqd);
-	spin_unlock_irq(q->queue_lock);
-	cfq_log(cfqd, "set_request fail");
-	return 1;
 }
 
 static void cfq_kick_queue(struct work_struct *work)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c8f1e67a8ebe..c24f3d7fbf1e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -60,8 +60,8 @@ struct elevator_ops
 	elevator_request_list_fn *elevator_former_req_fn;
 	elevator_request_list_fn *elevator_latter_req_fn;
 
-	elevator_init_icq_fn *elevator_init_icq_fn;
-	elevator_exit_icq_fn *elevator_exit_icq_fn;
+	elevator_init_icq_fn *elevator_init_icq_fn;	/* see iocontext.h */
+	elevator_exit_icq_fn *elevator_exit_icq_fn;	/* ditto */
 
 	elevator_set_req_fn *elevator_set_req_fn;
 	elevator_put_req_fn *elevator_put_req_fn;
@@ -90,8 +90,8 @@ struct elevator_type
 
 	/* fields provided by elevator implementation */
 	struct elevator_ops ops;
-	size_t icq_size;
-	size_t icq_align;
+	size_t icq_size;	/* see iocontext.h */
+	size_t icq_align;	/* ditto */
 	struct elv_fs_entry *elevator_attrs;
 	char elevator_name[ELV_NAME_MAX];
 	struct module *elevator_owner;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index ac390a34c0e7..7e1371c4bccf 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -10,6 +10,65 @@ enum {
 	ICQ_CGROUP_CHANGED,
 };
 
+/*
+ * An io_cq (icq) is association between an io_context (ioc) and a
+ * request_queue (q).  This is used by elevators which need to track
+ * information per ioc - q pair.
+ *
+ * Elevator can request use of icq by setting elevator_type->icq_size and
+ * ->icq_align.  Both size and align must be larger than that of struct
+ * io_cq and elevator can use the tail area for private information.  The
+ * recommended way to do this is defining a struct which contains io_cq as
+ * the first member followed by private members and using its size and
+ * align.  For example,
+ *
+ *	struct snail_io_cq {
+ *		struct io_cq	icq;
+ *		int		poke_snail;
+ *		int		feed_snail;
+ *	};
+ *
+ *	struct elevator_type snail_elv_type {
+ *		.ops =		{ ... },
+ *		.icq_size =	sizeof(struct snail_io_cq),
+ *		.icq_align =	__alignof__(struct snail_io_cq),
+ *		...
+ *	};
+ *
+ * If icq_size is set, block core will manage icq's.  All requests will
+ * have its ->elv.icq field set before elevator_ops->elevator_set_req_fn()
+ * is called and be holding a reference to the associated io_context.
+ *
+ * Whenever a new icq is created, elevator_ops->elevator_init_icq_fn() is
+ * called and, on destruction, ->elevator_exit_icq_fn().  Both functions
+ * are called with both the associated io_context and queue locks held.
+ *
+ * Elevator is allowed to lookup icq using ioc_lookup_icq() while holding
+ * queue lock but the returned icq is valid only until the queue lock is
+ * released.  Elevators can not and should not try to create or destroy
+ * icq's.
+ *
+ * As icq's are linked from both ioc and q, the locking rules are a bit
+ * complex.
+ *
+ * - ioc lock nests inside q lock.
+ *
+ * - ioc->icq_list and icq->ioc_node are protected by ioc lock.
+ *   q->icq_list and icq->q_node by q lock.
+ *
+ * - ioc->icq_tree and ioc->icq_hint are protected by ioc lock, while icq
+ *   itself is protected by q lock.  However, both the indexes and icq
+ *   itself are also RCU managed and lookup can be performed holding only
+ *   the q lock.
+ *
+ * - icq's are not reference counted.  They are destroyed when either the
+ *   ioc or q goes away.  Each request with icq set holds an extra
+ *   reference to ioc to ensure it stays until the request is completed.
+ *
+ * - Linking and unlinking icq's are performed while holding both ioc and q
+ *   locks.  Due to the lock ordering, q exit is simple but ioc exit
+ *   requires reverse-order double lock dance.
+ */
 struct io_cq {
 	struct request_queue	*q;
 	struct io_context	*ioc;
-- 
cgit v1.2.3


From 175d6146738b3d04e1adcaa4a971a3b2b0dbd8af Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 28 Nov 2011 14:36:36 +0100
Subject: iommu/amd: Add invalid_ppr callback

This callback can be used to change the PRI response code
sent to a device when a PPR fault fails.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu_v2.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/amd-iommu.h    | 34 +++++++++++++++++++++++---
 2 files changed, 86 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index abdb8396f89a..fe812e2a0474 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -62,6 +62,7 @@ struct device_state {
 	struct iommu_domain *domain;
 	int pasid_levels;
 	int max_pasids;
+	amd_iommu_invalid_ppr_cb inv_ppr_cb;
 	spinlock_t lock;
 	wait_queue_head_t wq;
 };
@@ -505,10 +506,31 @@ static void do_fault(struct work_struct *work)
 	npages = get_user_pages(fault->state->task, fault->state->mm,
 				fault->address, 1, write, 0, &page, NULL);
 
-	if (npages == 1)
+	if (npages == 1) {
 		put_page(page);
-	else
+	} else if (fault->dev_state->inv_ppr_cb) {
+		int status;
+
+		status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
+						      fault->pasid,
+						      fault->address,
+						      fault->flags);
+		switch (status) {
+		case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
+			set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
+			break;
+		case AMD_IOMMU_INV_PRI_RSP_INVALID:
+			set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+			break;
+		case AMD_IOMMU_INV_PRI_RSP_FAIL:
+			set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
+			break;
+		default:
+			BUG();
+		}
+	} else {
 		set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+	}
 
 	finish_pri_tag(fault->dev_state, fault->state, fault->tag);
 
@@ -828,6 +850,37 @@ void amd_iommu_free_device(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL(amd_iommu_free_device);
 
+int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+				 amd_iommu_invalid_ppr_cb cb)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	u16 devid;
+	int ret;
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	devid = device_id(pdev);
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	ret = -EINVAL;
+	dev_state = state_table[devid];
+	if (dev_state == NULL)
+		goto out_unlock;
+
+	dev_state->inv_ppr_cb = cb;
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
+
 static int __init amd_iommu_v2_init(void)
 {
 	size_t state_table_size;
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 23e21e15dfab..06688c42167d 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -28,9 +28,6 @@ struct task_struct;
 struct pci_dev;
 
 extern int amd_iommu_detect(void);
-extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
-				struct task_struct *task);
-extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
 
 
 /**
@@ -91,6 +88,37 @@ extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
  */
 extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
 
+/**
+ * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
+ *				    PRI requests
+ * @pdev: The PCI device the call-back should be registered for
+ * @cb: The call-back function
+ *
+ * The IOMMUv2 driver invokes this call-back when it is unable to
+ * successfully handle a PRI request. The device driver can then decide
+ * which PRI response the device should see. Possible return values for
+ * the call-back are:
+ *
+ * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device
+ * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device
+ * - AMD_IOMMU_INV_PRI_RSP_FAIL    - Send Failure back to the device,
+ *				     the device is required to disable
+ *				     PRI when it receives this response
+ *
+ * The function returns 0 on success or negative value on error.
+ */
+#define AMD_IOMMU_INV_PRI_RSP_SUCCESS	0
+#define AMD_IOMMU_INV_PRI_RSP_INVALID	1
+#define AMD_IOMMU_INV_PRI_RSP_FAIL	2
+
+typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
+					int pasid,
+					unsigned long address,
+					u16);
+
+extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+					amd_iommu_invalid_ppr_cb cb);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From 52efdb89d60a0f19949129a08af3437a7aab70be Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 7 Dec 2011 12:01:36 +0100
Subject: iommu/amd: Add amd_iommu_device_info() function

This function can be used to find out which features
necessary for IOMMUv2 usage are available on a given device.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/amd-iommu.h | 26 ++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index d5074f428423..03944e76b700 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3565,3 +3565,46 @@ void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
 	dev_data->errata |= (1 << erratum);
 }
 EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
+
+int amd_iommu_device_info(struct pci_dev *pdev,
+                          struct amd_iommu_device_info *info)
+{
+	int max_pasids;
+	int pos;
+
+	if (pdev == NULL || info == NULL)
+		return -EINVAL;
+
+	if (!amd_iommu_v2_supported())
+		return -EINVAL;
+
+	memset(info, 0, sizeof(*info));
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
+	if (pos)
+		info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+	if (pos)
+		info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
+	if (pos) {
+		int features;
+
+		max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
+		max_pasids = min(max_pasids, (1 << 20));
+
+		info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+		info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
+
+		features = pci_pasid_features(pdev);
+		if (features & PCI_PASID_CAP_EXEC)
+			info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
+		if (features & PCI_PASID_CAP_PRIV)
+			info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(amd_iommu_device_info);
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 06688c42167d..c03c281ae6ee 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -119,6 +119,32 @@ typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
 extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
 					amd_iommu_invalid_ppr_cb cb);
 
+/**
+ * amd_iommu_device_info() - Get information about IOMMUv2 support of a
+ *			     PCI device
+ * @pdev: PCI device to query information from
+ * @info: A pointer to an amd_iommu_device_info structure which will contain
+ *	  the information about the PCI device
+ *
+ * Returns 0 on success, negative value on error
+ */
+
+#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP     0x1    /* ATS feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP     0x2    /* PRI feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP   0x4    /* PASID context supported */
+#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP    0x8    /* Device may request execution
+						    on memory pages */
+#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP   0x10    /* Device may request
+						    super-user privileges */
+
+struct amd_iommu_device_info {
+	int max_pasids;
+	u32 flags;
+};
+
+extern int amd_iommu_device_info(struct pci_dev *pdev,
+				 struct amd_iommu_device_info *info);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From bc21662f729cd17d2af93e149f4eccafc7b10181 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 7 Dec 2011 12:24:42 +0100
Subject: iommu/amd: Add invalidate-context call-back

This call-back is invoked when the task that is bound to a
pasid is about to exit. The driver can use it to shutdown
all context related to that context in a safe way.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/iommu/amd_iommu_v2.c | 35 +++++++++++++++++++++++++++++++++++
 include/linux/amd-iommu.h    | 17 +++++++++++++++++
 2 files changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index fe812e2a0474..8add9f125d3e 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -63,6 +63,7 @@ struct device_state {
 	int pasid_levels;
 	int max_pasids;
 	amd_iommu_invalid_ppr_cb inv_ppr_cb;
+	amd_iommu_invalidate_ctx inv_ctx_cb;
 	spinlock_t lock;
 	wait_queue_head_t wq;
 };
@@ -637,6 +638,9 @@ again:
 		dev_state = pasid_state->device_state;
 		pasid     = pasid_state->pasid;
 
+		if (pasid_state->device_state->inv_ctx_cb)
+			dev_state->inv_ctx_cb(dev_state->pdev, pasid);
+
 		unbind_pasid(dev_state, pasid);
 
 		/* Task may be in the list multiple times */
@@ -881,6 +885,37 @@ out_unlock:
 }
 EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
 
+int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
+				    amd_iommu_invalidate_ctx cb)
+{
+	struct device_state *dev_state;
+	unsigned long flags;
+	u16 devid;
+	int ret;
+
+	if (!amd_iommu_v2_supported())
+		return -ENODEV;
+
+	devid = device_id(pdev);
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	ret = -EINVAL;
+	dev_state = state_table[devid];
+	if (dev_state == NULL)
+		goto out_unlock;
+
+	dev_state->inv_ctx_cb = cb;
+
+	ret = 0;
+
+out_unlock:
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
+
 static int __init amd_iommu_v2_init(void)
 {
 	size_t state_table_size;
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index c03c281ae6ee..ef00610837d4 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -145,6 +145,23 @@ struct amd_iommu_device_info {
 extern int amd_iommu_device_info(struct pci_dev *pdev,
 				 struct amd_iommu_device_info *info);
 
+/**
+ * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating
+ *				       a pasid context. This call-back is
+ *				       invoked when the IOMMUv2 driver needs to
+ *				       invalidate a PASID context, for example
+ *				       because the task that is bound to that
+ *				       context is about to exit.
+ *
+ * @pdev: The PCI device the call-back should be registered for
+ * @cb: The call-back function
+ */
+
+typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid);
+
+extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
+					   amd_iommu_invalidate_ctx cb);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
-- 
cgit v1.2.3


From 54848d73f9f254631303d6eab9b976855988b266 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Tue, 5 Apr 2011 13:21:19 -0600
Subject: writeback: charge leaked page dirties to active tasks

It's a years long problem that a large number of short-lived dirtiers
(eg. gcc instances in a fast kernel build) may starve long-run dirtiers
(eg. dd) as well as pushing the dirty pages to the global hard limit.

The solution is to charge the pages dirtied by the exited gcc to the
other random dirtying tasks. It sounds not perfect, however should
behave good enough in practice, seeing as that throttled tasks aren't
actually running so those that are running are more likely to pick it up
and get throttled, therefore promoting an equal spread.

Randy: fix compile error: 'dirty_throttle_leaks' undeclared in exit.c

Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/writeback.h |  2 ++
 kernel/exit.c             |  3 +++
 mm/page-writeback.c       | 27 +++++++++++++++++++++++++++
 3 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a378c295851f..05eaf5e3aad7 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -7,6 +7,8 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 
+DECLARE_PER_CPU(int, dirty_throttle_leaks);
+
 /*
  * The 1/4 region under the global dirty thresh is for smooth dirty throttling:
  *
diff --git a/kernel/exit.c b/kernel/exit.c
index d0b7d988f873..d4aac24cc469 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -51,6 +51,7 @@
 #include <trace/events/sched.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/oom.h>
+#include <linux/writeback.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1037,6 +1038,8 @@ NORET_TYPE void do_exit(long code)
 	validate_creds_for_do_exit(tsk);
 
 	preempt_disable();
+	if (tsk->nr_dirtied)
+		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
 	exit_rcu();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 50f08241f981..619c445fc03c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1214,6 +1214,22 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
 
 static DEFINE_PER_CPU(int, bdp_ratelimits);
 
+/*
+ * Normal tasks are throttled by
+ *	loop {
+ *		dirty tsk->nr_dirtied_pause pages;
+ *		take a snap in balance_dirty_pages();
+ *	}
+ * However there is a worst case. If every task exit immediately when dirtied
+ * (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
+ * called to throttle the page dirties. The solution is to save the not yet
+ * throttled page dirties in dirty_throttle_leaks on task exit and charge them
+ * randomly into the running tasks. This works well for the above worst case,
+ * as the new task will pick up and accumulate the old task's leaked dirty
+ * count and eventually get throttled.
+ */
+DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
+
 /**
  * balance_dirty_pages_ratelimited_nr - balance dirty memory state
  * @mapping: address_space which was dirtied
@@ -1261,6 +1277,17 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
 			ratelimit = 0;
 		}
 	}
+	/*
+	 * Pick up the dirtied pages by the exited tasks. This avoids lots of
+	 * short-lived tasks (eg. gcc invocations in a kernel build) escaping
+	 * the dirty throttling and livelock other long-run dirtiers.
+	 */
+	p = &__get_cpu_var(dirty_throttle_leaks);
+	if (*p > 0 && current->nr_dirtied < ratelimit) {
+		nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
+		*p -= nr_pages_dirtied;
+		current->nr_dirtied += nr_pages_dirtied;
+	}
 	preempt_enable();
 
 	if (unlikely(current->nr_dirtied >= ratelimit))
-- 
cgit v1.2.3


From 2f800fbd777b792de54187088df19a7df0251254 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 8 Aug 2011 15:22:00 -0600
Subject: writeback: fix dirtied pages accounting on redirty

De-account the accumulative dirty counters on page redirty.

Page redirties (very common in ext4) will introduce mismatch between
counters (a) and (b)

a) NR_DIRTIED, BDI_DIRTIED, tsk->nr_dirtied
b) NR_WRITTEN, BDI_WRITTEN

This will introduce systematic errors in balanced_rate and result in
dirty page position errors (ie. the dirty pages are no longer balanced
around the global/bdi setpoints).

Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/writeback.h |  2 ++
 mm/page-writeback.c       | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 05eaf5e3aad7..b30419cd425e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -197,6 +197,8 @@ void writeback_set_ratelimit(void);
 void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end);
 
+void account_page_redirty(struct page *page);
+
 /* pdflush.c */
 extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
 				   read-only. */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5d1ef5d8613a..96b3e7aa705c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1824,6 +1824,24 @@ int __set_page_dirty_nobuffers(struct page *page)
 }
 EXPORT_SYMBOL(__set_page_dirty_nobuffers);
 
+/*
+ * Call this whenever redirtying a page, to de-account the dirty counters
+ * (NR_DIRTIED, BDI_DIRTIED, tsk->nr_dirtied), so that they match the written
+ * counters (NR_WRITTEN, BDI_WRITTEN) in long term. The mismatches will lead to
+ * systematic errors in balanced_dirty_ratelimit and the dirty pages position
+ * control.
+ */
+void account_page_redirty(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	if (mapping && mapping_cap_account_dirty(mapping)) {
+		current->nr_dirtied--;
+		dec_zone_page_state(page, NR_DIRTIED);
+		dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
+	}
+}
+EXPORT_SYMBOL(account_page_redirty);
+
 /*
  * When a writepage implementation decides that it doesn't want to write this
  * page for some reason, it should redirty the locked page via
@@ -1832,6 +1850,7 @@ EXPORT_SYMBOL(__set_page_dirty_nobuffers);
 int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
 {
 	wbc->pages_skipped++;
+	account_page_redirty(page);
 	return __set_page_dirty_nobuffers(page);
 }
 EXPORT_SYMBOL(redirty_page_for_writepage);
-- 
cgit v1.2.3


From 83712358ba0a1497ce59a4f84ce4dd0f803fe6fc Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sat, 11 Jun 2011 19:25:42 -0600
Subject: writeback: dirty ratelimit - think time compensation

Compensate the task's think time when computing the final pause time,
so that ->dirty_ratelimit can be executed accurately.

        think time := time spend outside of balance_dirty_pages()

In the rare case that the task slept longer than the 200ms period time
(result in negative pause time), the sleep time will be compensated in
the following periods, too, if it's less than 1 second.

Accumulated errors are carefully avoided as long as the max pause area
is not hitted.

Pseudo code:

        period = pages_dirtied / task_ratelimit;
        think = jiffies - dirty_paused_when;
        pause = period - think;

1) normal case: period > think

        pause = period - think
        dirty_paused_when = jiffies + pause
        nr_dirtied = 0

                             period time
              |===============================>|
                  think time      pause time
              |===============>|==============>|
        ------|----------------|---------------|------------------------
        dirty_paused_when   jiffies

2) no pause case: period <= think

        don't pause; reduce future pause time by:
        dirty_paused_when += period
        nr_dirtied = 0

                           period time
              |===============================>|
                                  think time
              |===================================================>|
        ------|--------------------------------+-------------------|----
        dirty_paused_when                                       jiffies

Acked-by: Jan Kara <jack@suse.cz>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/sched.h            |  1 +
 include/trace/events/writeback.h | 14 +++++++++++---
 kernel/fork.c                    |  1 +
 mm/page-writeback.c              | 36 ++++++++++++++++++++++++++++++++----
 4 files changed, 45 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1c4f3e9b9bc5..984c3b295978 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1527,6 +1527,7 @@ struct task_struct {
 	 */
 	int nr_dirtied;
 	int nr_dirtied_pause;
+	unsigned long dirty_paused_when; /* start of a write-and-pause period */
 
 #ifdef CONFIG_LATENCYTOP
 	int latency_record_count;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 99d1d0decf88..8588a8918023 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -300,12 +300,13 @@ TRACE_EVENT(balance_dirty_pages,
 		 unsigned long dirty_ratelimit,
 		 unsigned long task_ratelimit,
 		 unsigned long dirtied,
+		 unsigned long period,
 		 long pause,
 		 unsigned long start_time),
 
 	TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
 		dirty_ratelimit, task_ratelimit,
-		dirtied, pause, start_time),
+		dirtied, period, pause, start_time),
 
 	TP_STRUCT__entry(
 		__array(	 char,	bdi, 32)
@@ -320,6 +321,8 @@ TRACE_EVENT(balance_dirty_pages,
 		__field(unsigned int,	dirtied_pause)
 		__field(unsigned long,	paused)
 		__field(	 long,	pause)
+		__field(unsigned long,	period)
+		__field(	 long,	think)
 	),
 
 	TP_fast_assign(
@@ -336,6 +339,9 @@ TRACE_EVENT(balance_dirty_pages,
 		__entry->task_ratelimit	= KBps(task_ratelimit);
 		__entry->dirtied	= dirtied;
 		__entry->dirtied_pause	= current->nr_dirtied_pause;
+		__entry->think		= current->dirty_paused_when == 0 ? 0 :
+			 (long)(jiffies - current->dirty_paused_when) * 1000/HZ;
+		__entry->period		= period * 1000 / HZ;
 		__entry->pause		= pause * 1000 / HZ;
 		__entry->paused		= (jiffies - start_time) * 1000 / HZ;
 	),
@@ -346,7 +352,7 @@ TRACE_EVENT(balance_dirty_pages,
 		  "bdi_setpoint=%lu bdi_dirty=%lu "
 		  "dirty_ratelimit=%lu task_ratelimit=%lu "
 		  "dirtied=%u dirtied_pause=%u "
-		  "paused=%lu pause=%ld",
+		  "paused=%lu pause=%ld period=%lu think=%ld",
 		  __entry->bdi,
 		  __entry->limit,
 		  __entry->setpoint,
@@ -358,7 +364,9 @@ TRACE_EVENT(balance_dirty_pages,
 		  __entry->dirtied,
 		  __entry->dirtied_pause,
 		  __entry->paused,	/* ms */
-		  __entry->pause	/* ms */
+		  __entry->pause,	/* ms */
+		  __entry->period,	/* ms */
+		  __entry->think	/* ms */
 	  )
 );
 
diff --git a/kernel/fork.c b/kernel/fork.c
index da4a6a10d088..f8668cf6a32d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1296,6 +1296,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	p->nr_dirtied = 0;
 	p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
+	p->dirty_paused_when = 0;
 
 	/*
 	 * Ok, make it visible to the rest of the system.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 96b3e7aa705c..491932155825 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1016,6 +1016,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long bdi_thresh;
+	long period;
 	long pause = 0;
 	long uninitialized_var(max_pause);
 	bool dirty_exceeded = false;
@@ -1026,6 +1027,8 @@ static void balance_dirty_pages(struct address_space *mapping,
 	unsigned long start_time = jiffies;
 
 	for (;;) {
+		unsigned long now = jiffies;
+
 		/*
 		 * Unstable writes are a feature of certain networked
 		 * filesystems (i.e. NFS) in which data may have been
@@ -1045,8 +1048,11 @@ static void balance_dirty_pages(struct address_space *mapping,
 		 */
 		freerun = dirty_freerun_ceiling(dirty_thresh,
 						background_thresh);
-		if (nr_dirty <= freerun)
+		if (nr_dirty <= freerun) {
+			current->dirty_paused_when = now;
+			current->nr_dirtied = 0;
 			break;
+		}
 
 		if (unlikely(!writeback_in_progress(bdi)))
 			bdi_start_background_writeback(bdi);
@@ -1104,10 +1110,21 @@ static void balance_dirty_pages(struct address_space *mapping,
 		task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
 							RATELIMIT_CALC_SHIFT;
 		if (unlikely(task_ratelimit == 0)) {
+			period = max_pause;
 			pause = max_pause;
 			goto pause;
 		}
-		pause = HZ * pages_dirtied / task_ratelimit;
+		period = HZ * pages_dirtied / task_ratelimit;
+		pause = period;
+		if (current->dirty_paused_when)
+			pause -= now - current->dirty_paused_when;
+		/*
+		 * For less than 1s think time (ext3/4 may block the dirtier
+		 * for up to 800ms from time to time on 1-HDD; so does xfs,
+		 * however at much less frequency), try to compensate it in
+		 * future periods by updating the virtual time; otherwise just
+		 * do a reset, as it may be a light dirtier.
+		 */
 		if (unlikely(pause <= 0)) {
 			trace_balance_dirty_pages(bdi,
 						  dirty_thresh,
@@ -1118,8 +1135,16 @@ static void balance_dirty_pages(struct address_space *mapping,
 						  dirty_ratelimit,
 						  task_ratelimit,
 						  pages_dirtied,
+						  period,
 						  pause,
 						  start_time);
+			if (pause < -HZ) {
+				current->dirty_paused_when = now;
+				current->nr_dirtied = 0;
+			} else if (period) {
+				current->dirty_paused_when += period;
+				current->nr_dirtied = 0;
+			}
 			pause = 1; /* avoid resetting nr_dirtied_pause below */
 			break;
 		}
@@ -1135,11 +1160,15 @@ pause:
 					  dirty_ratelimit,
 					  task_ratelimit,
 					  pages_dirtied,
+					  period,
 					  pause,
 					  start_time);
 		__set_current_state(TASK_KILLABLE);
 		io_schedule_timeout(pause);
 
+		current->dirty_paused_when = now + pause;
+		current->nr_dirtied = 0;
+
 		/*
 		 * This is typically equal to (nr_dirty < dirty_thresh) and can
 		 * also keep "1000+ dd on a slow USB stick" under control.
@@ -1167,11 +1196,10 @@ pause:
 	if (!dirty_exceeded && bdi->dirty_exceeded)
 		bdi->dirty_exceeded = 0;
 
-	current->nr_dirtied = 0;
 	if (pause == 0) { /* in freerun area */
 		current->nr_dirtied_pause =
 				dirty_poll_interval(nr_dirty, dirty_thresh);
-	} else if (pause <= max_pause / 4 &&
+	} else if (period <= max_pause / 4 &&
 		   pages_dirtied >= current->nr_dirtied_pause) {
 		current->nr_dirtied_pause = clamp_val(
 					dirty_ratelimit * (max_pause / 2) / HZ,
-- 
cgit v1.2.3


From fb21c2f42879c05c76ea9e249b6905fc729f8529 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Dec 2011 14:02:26 +0100
Subject: fbdev: Add FOURCC-based format configuration API

This API will be used to support YUV frame buffer formats in a standard
way.

Last but not least, create a much needed fbdev API documentation and
document the format setting APIs.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
---
 Documentation/fb/api.txt | 306 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/video/fbmem.c    |  14 +++
 include/linux/fb.h       |  14 ++-
 3 files changed, 330 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/fb/api.txt

(limited to 'include/linux')

diff --git a/Documentation/fb/api.txt b/Documentation/fb/api.txt
new file mode 100644
index 000000000000..d4ff7de85700
--- /dev/null
+++ b/Documentation/fb/api.txt
@@ -0,0 +1,306 @@
+			The Frame Buffer Device API
+			---------------------------
+
+Last revised: June 21, 2011
+
+
+0. Introduction
+---------------
+
+This document describes the frame buffer API used by applications to interact
+with frame buffer devices. In-kernel APIs between device drivers and the frame
+buffer core are not described.
+
+Due to a lack of documentation in the original frame buffer API, drivers
+behaviours differ in subtle (and not so subtle) ways. This document describes
+the recommended API implementation, but applications should be prepared to
+deal with different behaviours.
+
+
+1. Capabilities
+---------------
+
+Device and driver capabilities are reported in the fixed screen information
+capabilities field.
+
+struct fb_fix_screeninfo {
+	...
+	__u16 capabilities;		/* see FB_CAP_*			*/
+	...
+};
+
+Application should use those capabilities to find out what features they can
+expect from the device and driver.
+
+- FB_CAP_FOURCC
+
+The driver supports the four character code (FOURCC) based format setting API.
+When supported, formats are configured using a FOURCC instead of manually
+specifying color components layout.
+
+
+2. Types and visuals
+--------------------
+
+Pixels are stored in memory in hardware-dependent formats. Applications need
+to be aware of the pixel storage format in order to write image data to the
+frame buffer memory in the format expected by the hardware.
+
+Formats are described by frame buffer types and visuals. Some visuals require
+additional information, which are stored in the variable screen information
+bits_per_pixel, grayscale, red, green, blue and transp fields.
+
+Visuals describe how color information is encoded and assembled to create
+macropixels. Types describe how macropixels are stored in memory. The following
+types and visuals are supported.
+
+- FB_TYPE_PACKED_PIXELS
+
+Macropixels are stored contiguously in a single plane. If the number of bits
+per macropixel is not a multiple of 8, whether macropixels are padded to the
+next multiple of 8 bits or packed together into bytes depends on the visual.
+
+Padding at end of lines may be present and is then reported through the fixed
+screen information line_length field.
+
+- FB_TYPE_PLANES
+
+Macropixels are split across multiple planes. The number of planes is equal to
+the number of bits per macropixel, with plane i'th storing i'th bit from all
+macropixels.
+
+Planes are located contiguously in memory.
+
+- FB_TYPE_INTERLEAVED_PLANES
+
+Macropixels are split across multiple planes. The number of planes is equal to
+the number of bits per macropixel, with plane i'th storing i'th bit from all
+macropixels.
+
+Planes are interleaved in memory. The interleave factor, defined as the
+distance in bytes between the beginning of two consecutive interleaved blocks
+belonging to different planes, is stored in the fixed screen information
+type_aux field.
+
+- FB_TYPE_FOURCC
+
+Macropixels are stored in memory as described by the format FOURCC identifier
+stored in the variable screen information grayscale field.
+
+- FB_VISUAL_MONO01
+
+Pixels are black or white and stored on a number of bits (typically one)
+specified by the variable screen information bpp field.
+
+Black pixels are represented by all bits set to 1 and white pixels by all bits
+set to 0. When the number of bits per pixel is smaller than 8, several pixels
+are packed together in a byte.
+
+FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
+
+- FB_VISUAL_MONO10
+
+Pixels are black or white and stored on a number of bits (typically one)
+specified by the variable screen information bpp field.
+
+Black pixels are represented by all bits set to 0 and white pixels by all bits
+set to 1. When the number of bits per pixel is smaller than 8, several pixels
+are packed together in a byte.
+
+FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
+
+- FB_VISUAL_TRUECOLOR
+
+Pixels are broken into red, green and blue components, and each component
+indexes a read-only lookup table for the corresponding value. Lookup tables
+are device-dependent, and provide linear or non-linear ramps.
+
+Each component is stored in a macropixel according to the variable screen
+information red, green, blue and transp fields.
+
+- FB_VISUAL_PSEUDOCOLOR and FB_VISUAL_STATIC_PSEUDOCOLOR
+
+Pixel values are encoded as indices into a colormap that stores red, green and
+blue components. The colormap is read-only for FB_VISUAL_STATIC_PSEUDOCOLOR
+and read-write for FB_VISUAL_PSEUDOCOLOR.
+
+Each pixel value is stored in the number of bits reported by the variable
+screen information bits_per_pixel field.
+
+- FB_VISUAL_DIRECTCOLOR
+
+Pixels are broken into red, green and blue components, and each component
+indexes a programmable lookup table for the corresponding value.
+
+Each component is stored in a macropixel according to the variable screen
+information red, green, blue and transp fields.
+
+- FB_VISUAL_FOURCC
+
+Pixels are encoded and  interpreted as described by the format FOURCC
+identifier stored in the variable screen information grayscale field.
+
+
+3. Screen information
+---------------------
+
+Screen information are queried by applications using the FBIOGET_FSCREENINFO
+and FBIOGET_VSCREENINFO ioctls. Those ioctls take a pointer to a
+fb_fix_screeninfo and fb_var_screeninfo structure respectively.
+
+struct fb_fix_screeninfo stores device independent unchangeable information
+about the frame buffer device and the current format. Those information can't
+be directly modified by applications, but can be changed by the driver when an
+application modifies the format.
+
+struct fb_fix_screeninfo {
+	char id[16];			/* identification string eg "TT Builtin" */
+	unsigned long smem_start;	/* Start of frame buffer mem */
+					/* (physical address) */
+	__u32 smem_len;			/* Length of frame buffer mem */
+	__u32 type;			/* see FB_TYPE_*		*/
+	__u32 type_aux;			/* Interleave for interleaved Planes */
+	__u32 visual;			/* see FB_VISUAL_*		*/
+	__u16 xpanstep;			/* zero if no hardware panning  */
+	__u16 ypanstep;			/* zero if no hardware panning  */
+	__u16 ywrapstep;		/* zero if no hardware ywrap    */
+	__u32 line_length;		/* length of a line in bytes    */
+	unsigned long mmio_start;	/* Start of Memory Mapped I/O   */
+					/* (physical address) */
+	__u32 mmio_len;			/* Length of Memory Mapped I/O  */
+	__u32 accel;			/* Indicate to driver which	*/
+					/*  specific chip/card we have	*/
+	__u16 capabilities;		/* see FB_CAP_*			*/
+	__u16 reserved[2];		/* Reserved for future compatibility */
+};
+
+struct fb_var_screeninfo stores device independent changeable information
+about a frame buffer device, its current format and video mode, as well as
+other miscellaneous parameters.
+
+struct fb_var_screeninfo {
+	__u32 xres;			/* visible resolution		*/
+	__u32 yres;
+	__u32 xres_virtual;		/* virtual resolution		*/
+	__u32 yres_virtual;
+	__u32 xoffset;			/* offset from virtual to visible */
+	__u32 yoffset;			/* resolution			*/
+
+	__u32 bits_per_pixel;		/* guess what			*/
+	__u32 grayscale;		/* 0 = color, 1 = grayscale,	*/
+					/* >1 = FOURCC			*/
+	struct fb_bitfield red;		/* bitfield in fb mem if true color, */
+	struct fb_bitfield green;	/* else only length is significant */
+	struct fb_bitfield blue;
+	struct fb_bitfield transp;	/* transparency			*/
+
+	__u32 nonstd;			/* != 0 Non standard pixel format */
+
+	__u32 activate;			/* see FB_ACTIVATE_*		*/
+
+	__u32 height;			/* height of picture in mm    */
+	__u32 width;			/* width of picture in mm     */
+
+	__u32 accel_flags;		/* (OBSOLETE) see fb_info.flags */
+
+	/* Timing: All values in pixclocks, except pixclock (of course) */
+	__u32 pixclock;			/* pixel clock in ps (pico seconds) */
+	__u32 left_margin;		/* time from sync to picture	*/
+	__u32 right_margin;		/* time from picture to sync	*/
+	__u32 upper_margin;		/* time from sync to picture	*/
+	__u32 lower_margin;
+	__u32 hsync_len;		/* length of horizontal sync	*/
+	__u32 vsync_len;		/* length of vertical sync	*/
+	__u32 sync;			/* see FB_SYNC_*		*/
+	__u32 vmode;			/* see FB_VMODE_*		*/
+	__u32 rotate;			/* angle we rotate counter clockwise */
+	__u32 colorspace;		/* colorspace for FOURCC-based modes */
+	__u32 reserved[4];		/* Reserved for future compatibility */
+};
+
+To modify variable information, applications call the FBIOPUT_VSCREENINFO
+ioctl with a pointer to a fb_var_screeninfo structure. If the call is
+successful, the driver will update the fixed screen information accordingly.
+
+Instead of filling the complete fb_var_screeninfo structure manually,
+applications should call the FBIOGET_VSCREENINFO ioctl and modify only the
+fields they care about.
+
+
+4. Format configuration
+-----------------------
+
+Frame buffer devices offer two ways to configure the frame buffer format: the
+legacy API and the FOURCC-based API.
+
+
+The legacy API has been the only frame buffer format configuration API for a
+long time and is thus widely used by application. It is the recommended API
+for applications when using RGB and grayscale formats, as well as legacy
+non-standard formats.
+
+To select a format, applications set the fb_var_screeninfo bits_per_pixel field
+to the desired frame buffer depth. Values up to 8 will usually map to
+monochrome, grayscale or pseudocolor visuals, although this is not required.
+
+- For grayscale formats, applications set the grayscale field to one. The red,
+  blue, green and transp fields must be set to 0 by applications and ignored by
+  drivers. Drivers must fill the red, blue and green offsets to 0 and lengths
+  to the bits_per_pixel value.
+
+- For pseudocolor formats, applications set the grayscale field to zero. The
+  red, blue, green and transp fields must be set to 0 by applications and
+  ignored by drivers. Drivers must fill the red, blue and green offsets to 0
+  and lengths to the bits_per_pixel value.
+
+- For truecolor and directcolor formats, applications set the grayscale field
+  to zero, and the red, blue, green and transp fields to describe the layout of
+  color components in memory.
+
+struct fb_bitfield {
+	__u32 offset;			/* beginning of bitfield	*/
+	__u32 length;			/* length of bitfield		*/
+	__u32 msb_right;		/* != 0 : Most significant bit is */
+					/* right */
+};
+
+  Pixel values are bits_per_pixel wide and are split in non-overlapping red,
+  green, blue and alpha (transparency) components. Location and size of each
+  component in the pixel value are described by the fb_bitfield offset and
+  length fields. Offset are computed from the right.
+
+  Pixels are always stored in an integer number of bytes. If the number of
+  bits per pixel is not a multiple of 8, pixel values are padded to the next
+  multiple of 8 bits.
+
+Upon successful format configuration, drivers update the fb_fix_screeninfo
+type, visual and line_length fields depending on the selected format.
+
+
+The FOURCC-based API replaces format descriptions by four character codes
+(FOURCC). FOURCCs are abstract identifiers that uniquely define a format
+without explicitly describing it. This is the only API that supports YUV
+formats. Drivers are also encouraged to implement the FOURCC-based API for RGB
+and grayscale formats.
+
+Drivers that support the FOURCC-based API report this capability by setting
+the FB_CAP_FOURCC bit in the fb_fix_screeninfo capabilities field.
+
+FOURCC definitions are located in the linux/videodev2.h header. However, and
+despite starting with the V4L2_PIX_FMT_prefix, they are not restricted to V4L2
+and don't require usage of the V4L2 subsystem. FOURCC documentation is
+available in Documentation/DocBook/v4l/pixfmt.xml.
+
+To select a format, applications set the grayscale field to the desired FOURCC.
+For YUV formats, they should also select the appropriate colorspace by setting
+the colorspace field to one of the colorspaces listed in linux/videodev2.h and
+documented in Documentation/DocBook/v4l/colorspaces.xml.
+
+The red, green, blue and transp fields are not used with the FOURCC-based API.
+For forward compatibility reasons applications must zero those fields, and
+drivers must ignore them. Values other than 0 may get a meaning in future
+extensions.
+
+Upon successful format configuration, drivers update the fb_fix_screeninfo
+type, visual and line_length fields depending on the selected format. The type
+and visual fields are set to FB_TYPE_FOURCC and FB_VISUAL_FOURCC respectively.
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index ad936295d8f4..ac9141b85356 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -967,6 +967,20 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
 	    memcmp(&info->var, var, sizeof(struct fb_var_screeninfo))) {
 		u32 activate = var->activate;
 
+		/* When using FOURCC mode, make sure the red, green, blue and
+		 * transp fields are set to 0.
+		 */
+		if ((info->fix.capabilities & FB_CAP_FOURCC) &&
+		    var->grayscale > 1) {
+			if (var->red.offset     || var->green.offset    ||
+			    var->blue.offset    || var->transp.offset   ||
+			    var->red.length     || var->green.length    ||
+			    var->blue.length    || var->transp.length   ||
+			    var->red.msb_right  || var->green.msb_right ||
+			    var->blue.msb_right || var->transp.msb_right)
+				return -EINVAL;
+		}
+
 		if (!info->fbops->fb_check_var) {
 			*var = info->var;
 			goto done;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 1d6836c498dd..c18122f40543 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -45,6 +45,7 @@
 #define FB_TYPE_INTERLEAVED_PLANES	2	/* Interleaved planes	*/
 #define FB_TYPE_TEXT			3	/* Text/attributes	*/
 #define FB_TYPE_VGA_PLANES		4	/* EGA/VGA planes	*/
+#define FB_TYPE_FOURCC			5	/* Type identified by a V4L2 FOURCC */
 
 #define FB_AUX_TEXT_MDA		0	/* Monochrome text */
 #define FB_AUX_TEXT_CGA		1	/* CGA/EGA/VGA Color text */
@@ -69,6 +70,7 @@
 #define FB_VISUAL_PSEUDOCOLOR		3	/* Pseudo color (like atari) */
 #define FB_VISUAL_DIRECTCOLOR		4	/* Direct color */
 #define FB_VISUAL_STATIC_PSEUDOCOLOR	5	/* Pseudo color readonly */
+#define FB_VISUAL_FOURCC		6	/* Visual identified by a V4L2 FOURCC */
 
 #define FB_ACCEL_NONE		0	/* no hardware accelerator	*/
 #define FB_ACCEL_ATARIBLITT	1	/* Atari Blitter		*/
@@ -154,6 +156,8 @@
 
 #define FB_ACCEL_PUV3_UNIGFX	0xa0	/* PKUnity-v3 Unigfx		*/
 
+#define FB_CAP_FOURCC		1	/* Device supports FOURCC-based formats */
+
 struct fb_fix_screeninfo {
 	char id[16];			/* identification string eg "TT Builtin" */
 	unsigned long smem_start;	/* Start of frame buffer mem */
@@ -171,7 +175,8 @@ struct fb_fix_screeninfo {
 	__u32 mmio_len;			/* Length of Memory Mapped I/O  */
 	__u32 accel;			/* Indicate to driver which	*/
 					/*  specific chip/card we have	*/
-	__u16 reserved[3];		/* Reserved for future compatibility */
+	__u16 capabilities;		/* see FB_CAP_*			*/
+	__u16 reserved[2];		/* Reserved for future compatibility */
 };
 
 /* Interpretation of offset for color fields: All offsets are from the right,
@@ -246,8 +251,8 @@ struct fb_var_screeninfo {
 	__u32 yoffset;			/* resolution			*/
 
 	__u32 bits_per_pixel;		/* guess what			*/
-	__u32 grayscale;		/* != 0 Graylevels instead of colors */
-
+	__u32 grayscale;		/* 0 = color, 1 = grayscale,	*/
+					/* >1 = FOURCC			*/
 	struct fb_bitfield red;		/* bitfield in fb mem if true color, */
 	struct fb_bitfield green;	/* else only length is significant */
 	struct fb_bitfield blue;
@@ -273,7 +278,8 @@ struct fb_var_screeninfo {
 	__u32 sync;			/* see FB_SYNC_*		*/
 	__u32 vmode;			/* see FB_VMODE_*		*/
 	__u32 rotate;			/* angle we rotate counter clockwise */
-	__u32 reserved[5];		/* Reserved for future compatibility */
+	__u32 colorspace;		/* colorspace for FOURCC-based modes */
+	__u32 reserved[4];		/* Reserved for future compatibility */
 };
 
 struct fb_cmap {
-- 
cgit v1.2.3


From 0b9eabd77f4867232a9ac6ca54fa39607b0c9bc7 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Dec 2011 14:02:27 +0100
Subject: v4l: Add V4L2_PIX_FMT_NV24 and V4L2_PIX_FMT_NV42 formats

NV24 and NV42 are planar YCbCr 4:4:4 and YCrCb 4:4:4 formats with a
luma plane followed by an interleaved chroma plane.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
---
 Documentation/DocBook/media/v4l/pixfmt-nv24.xml | 121 ++++++++++++++++++++++++
 Documentation/DocBook/media/v4l/pixfmt.xml      |   1 +
 include/linux/videodev2.h                       |   2 +
 3 files changed, 124 insertions(+)
 create mode 100644 Documentation/DocBook/media/v4l/pixfmt-nv24.xml

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/v4l/pixfmt-nv24.xml b/Documentation/DocBook/media/v4l/pixfmt-nv24.xml
new file mode 100644
index 000000000000..fb255f2ca9dd
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-nv24.xml
@@ -0,0 +1,121 @@
+    <refentry>
+      <refmeta>
+	<refentrytitle>V4L2_PIX_FMT_NV24 ('NV24'), V4L2_PIX_FMT_NV42 ('NV42')</refentrytitle>
+	&manvol;
+      </refmeta>
+      <refnamediv>
+	<refname id="V4L2-PIX-FMT-NV24"><constant>V4L2_PIX_FMT_NV24</constant></refname>
+	<refname id="V4L2-PIX-FMT-NV42"><constant>V4L2_PIX_FMT_NV42</constant></refname>
+	<refpurpose>Formats with full horizontal and vertical
+chroma resolutions, also known as YUV 4:4:4. One luminance and one
+chrominance plane with alternating chroma samples as opposed to
+<constant>V4L2_PIX_FMT_YVU420</constant></refpurpose>
+      </refnamediv>
+      <refsect1>
+	<title>Description</title>
+
+	<para>These are two-plane versions of the YUV 4:4:4 format. The three
+	components are separated into two sub-images or planes. The Y plane is
+	first, with each Y sample stored in one byte per pixel. For
+	<constant>V4L2_PIX_FMT_NV24</constant>, a combined CbCr plane
+	immediately follows the Y plane in memory. The CbCr plane has the same
+	width and height, in pixels, as the Y plane (and the image). Each line
+	contains one CbCr pair per pixel, with each Cb and Cr sample stored in
+	one byte. <constant>V4L2_PIX_FMT_NV42</constant> is the same except that
+	the Cb and Cr samples are swapped, the CrCb plane starts with a Cr
+	sample.</para>
+
+	<para>If the Y plane has pad bytes after each row, then the CbCr plane
+	has twice as many pad bytes after its rows.</para>
+
+	<example>
+	  <title><constant>V4L2_PIX_FMT_NV24</constant> 4 &times; 4
+pixel image</title>
+
+	  <formalpara>
+	    <title>Byte Order.</title>
+	    <para>Each cell is one byte.
+		<informaltable frame="none">
+		<tgroup cols="9" align="center">
+		  <colspec align="left" colwidth="2*" />
+		  <tbody valign="top">
+		    <row>
+		      <entry>start&nbsp;+&nbsp;0:</entry>
+		      <entry>Y'<subscript>00</subscript></entry>
+		      <entry>Y'<subscript>01</subscript></entry>
+		      <entry>Y'<subscript>02</subscript></entry>
+		      <entry>Y'<subscript>03</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;4:</entry>
+		      <entry>Y'<subscript>10</subscript></entry>
+		      <entry>Y'<subscript>11</subscript></entry>
+		      <entry>Y'<subscript>12</subscript></entry>
+		      <entry>Y'<subscript>13</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;8:</entry>
+		      <entry>Y'<subscript>20</subscript></entry>
+		      <entry>Y'<subscript>21</subscript></entry>
+		      <entry>Y'<subscript>22</subscript></entry>
+		      <entry>Y'<subscript>23</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;12:</entry>
+		      <entry>Y'<subscript>30</subscript></entry>
+		      <entry>Y'<subscript>31</subscript></entry>
+		      <entry>Y'<subscript>32</subscript></entry>
+		      <entry>Y'<subscript>33</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;16:</entry>
+		      <entry>Cb<subscript>00</subscript></entry>
+		      <entry>Cr<subscript>00</subscript></entry>
+		      <entry>Cb<subscript>01</subscript></entry>
+		      <entry>Cr<subscript>01</subscript></entry>
+		      <entry>Cb<subscript>02</subscript></entry>
+		      <entry>Cr<subscript>02</subscript></entry>
+		      <entry>Cb<subscript>03</subscript></entry>
+		      <entry>Cr<subscript>03</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;24:</entry>
+		      <entry>Cb<subscript>10</subscript></entry>
+		      <entry>Cr<subscript>10</subscript></entry>
+		      <entry>Cb<subscript>11</subscript></entry>
+		      <entry>Cr<subscript>11</subscript></entry>
+		      <entry>Cb<subscript>12</subscript></entry>
+		      <entry>Cr<subscript>12</subscript></entry>
+		      <entry>Cb<subscript>13</subscript></entry>
+		      <entry>Cr<subscript>13</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;32:</entry>
+		      <entry>Cb<subscript>20</subscript></entry>
+		      <entry>Cr<subscript>20</subscript></entry>
+		      <entry>Cb<subscript>21</subscript></entry>
+		      <entry>Cr<subscript>21</subscript></entry>
+		      <entry>Cb<subscript>22</subscript></entry>
+		      <entry>Cr<subscript>22</subscript></entry>
+		      <entry>Cb<subscript>23</subscript></entry>
+		      <entry>Cr<subscript>23</subscript></entry>
+		    </row>
+		    <row>
+		      <entry>start&nbsp;+&nbsp;40:</entry>
+		      <entry>Cb<subscript>30</subscript></entry>
+		      <entry>Cr<subscript>30</subscript></entry>
+		      <entry>Cb<subscript>31</subscript></entry>
+		      <entry>Cr<subscript>31</subscript></entry>
+		      <entry>Cb<subscript>32</subscript></entry>
+		      <entry>Cr<subscript>32</subscript></entry>
+		      <entry>Cb<subscript>33</subscript></entry>
+		      <entry>Cr<subscript>33</subscript></entry>
+		    </row>
+		  </tbody>
+		</tgroup>
+		</informaltable>
+	      </para>
+	  </formalpara>
+	</example>
+      </refsect1>
+    </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index 2ff6b7776d7f..aef4615fb07b 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -714,6 +714,7 @@ information.</para>
     &sub-nv12m;
     &sub-nv12mt;
     &sub-nv16;
+    &sub-nv24;
     &sub-m420;
   </section>
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 4b752d5ee80e..d2f74f8e3fe3 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -343,6 +343,8 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_NV21    v4l2_fourcc('N', 'V', '2', '1') /* 12  Y/CrCb 4:2:0  */
 #define V4L2_PIX_FMT_NV16    v4l2_fourcc('N', 'V', '1', '6') /* 16  Y/CbCr 4:2:2  */
 #define V4L2_PIX_FMT_NV61    v4l2_fourcc('N', 'V', '6', '1') /* 16  Y/CrCb 4:2:2  */
+#define V4L2_PIX_FMT_NV24    v4l2_fourcc('N', 'V', '2', '4') /* 24  Y/CbCr 4:4:4  */
+#define V4L2_PIX_FMT_NV42    v4l2_fourcc('N', 'V', '4', '2') /* 24  Y/CrCb 4:4:4  */
 
 /* two non contiguous planes - one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12M   v4l2_fourcc('N', 'M', '1', '2') /* 12  Y/CbCr 4:2:0  */
-- 
cgit v1.2.3


From 114d6e9c103736487c967060d0a7aec9a7fce967 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 19 Dec 2011 11:32:56 -0800
Subject: security: update security_file_mmap() docs

This documents the fields added to security_file_mmap() that were
introduced in ed0321895182ffb6ecf210e066d87911b270d587.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 051d4e96cb1f..16bb52a65fa3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -590,6 +590,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@reqprot contains the protection requested by the application.
  *	@prot contains the protection that will be applied by the kernel.
  *	@flags contains the operational flags.
+ *	@addr contains virtual address that will be used for the operation.
+ *	@addr_only contains a boolean: 0 if file-backed VMA, otherwise 1.
  *	Return 0 if permission is granted.
  * @file_mprotect:
  *	Check permissions before changing memory access permissions.
-- 
cgit v1.2.3


From a597fa78d374c57dcf9a9bac02472a530cd7e60a Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 10 Jun 2011 12:23:30 -0300
Subject: [media] v4l: Add over-current and indicator flash fault bits

Flash controllers can report over-current and indicator fault
conditions. Define flash fault control bits for them.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Sakari Ailus <sakari.ailus@maxwell.research.nokia.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/controls.xml | 10 ++++++++++
 include/linux/videodev2.h                    |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index 9e72f077329a..c0422c622337 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -3329,6 +3329,16 @@ interface and may change in the future.</para>
 		  <entry>The short circuit protection of the flash
 		  controller has been triggered.</entry>
 		</row>
+		<row>
+		  <entry><constant>V4L2_FLASH_FAULT_OVER_CURRENT</constant></entry>
+		  <entry>Current in the LED power supply has exceeded the limit
+		  specific to the flash controller.</entry>
+		</row>
+		<row>
+		  <entry><constant>V4L2_FLASH_FAULT_INDICATOR</constant></entry>
+		  <entry>The flash controller has detected a short or open
+		  circuit condition on the indicator LED.</entry>
+		</row>
 	      </tbody>
 	    </entrytbl>
 	  </row>
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 4b752d5ee80e..3d62631839bc 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1682,6 +1682,8 @@ enum v4l2_flash_strobe_source {
 #define V4L2_FLASH_FAULT_TIMEOUT		(1 << 1)
 #define V4L2_FLASH_FAULT_OVER_TEMPERATURE	(1 << 2)
 #define V4L2_FLASH_FAULT_SHORT_CIRCUIT		(1 << 3)
+#define V4L2_FLASH_FAULT_OVER_CURRENT		(1 << 4)
+#define V4L2_FLASH_FAULT_INDICATOR		(1 << 5)
 
 #define V4L2_CID_FLASH_CHARGE			(V4L2_CID_FLASH_CLASS_BASE + 11)
 #define V4L2_CID_FLASH_READY			(V4L2_CID_FLASH_CLASS_BASE + 12)
-- 
cgit v1.2.3


From bf3b84006e22ae241ec3d53dbe6c6d1f6ceddb56 Mon Sep 17 00:00:00 2001
From: Manu Abraham <abraham.manu@gmail.com>
Date: Sat, 17 Dec 2011 20:36:55 -0300
Subject: [media] DVB: Use a unique delivery system identifier for DVBC_ANNEX_C

Use a unique delivery system identifier for DVBC_ANNEX_C, just like any
other.

DVBC_ANNEX_A and DVBC_ANNEX_C have slightly different parameters
and are used in 2 geographically different locations.

Signed-off-by: Manu Abraham <abraham.manu@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/dvb/frontend.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index cb114f52ccf7..b2a939f8f1e2 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -337,7 +337,7 @@ typedef enum fe_rolloff {
 
 typedef enum fe_delivery_system {
 	SYS_UNDEFINED,
-	SYS_DVBC_ANNEX_AC,
+	SYS_DVBC_ANNEX_A,
 	SYS_DVBC_ANNEX_B,
 	SYS_DVBT,
 	SYS_DSS,
@@ -354,8 +354,13 @@ typedef enum fe_delivery_system {
 	SYS_DAB,
 	SYS_DVBT2,
 	SYS_TURBO,
+	SYS_DVBC_ANNEX_C,
 } fe_delivery_system_t;
 
+
+#define SYS_DVBC_ANNEX_AC	SYS_DVBC_ANNEX_A
+
+
 struct dtv_cmds_h {
 	char	*name;		/* A display name for debugging purposes */
 
-- 
cgit v1.2.3


From fd66c45dd51000ff444231a94ac15ccab8cffd3d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sat, 17 Dec 2011 20:36:57 -0300
Subject: [media] Remove Annex A/C selection via roll-off factor

Instead of using a roll-off factor, change DRX-K & friends to select
the bandwidth filter and the Nyquist half roll-off via delivery system.

This provides a cleaner support for Annex A/C switch.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/tuners/xc5000.c       | 137 ++++++++++++-----------------
 drivers/media/dvb/dvb-core/dvb_frontend.c  |  25 +++++-
 drivers/media/dvb/frontends/drxk_hard.c    |  15 ++--
 drivers/media/dvb/frontends/tda18271c2dd.c |  44 ++++-----
 include/linux/dvb/frontend.h               |   2 -
 5 files changed, 106 insertions(+), 117 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/common/tuners/xc5000.c b/drivers/media/common/tuners/xc5000.c
index 97ad33896343..5c56d3cc030b 100644
--- a/drivers/media/common/tuners/xc5000.c
+++ b/drivers/media/common/tuners/xc5000.c
@@ -629,11 +629,13 @@ static void xc_debug_dump(struct xc5000_priv *priv)
 }
 
 static int xc5000_set_params(struct dvb_frontend *fe,
-	struct dvb_frontend_parameters *params)
+			     struct dvb_frontend_parameters *params)
 {
+	int ret, b;
 	struct xc5000_priv *priv = fe->tuner_priv;
-	int ret;
-	u32 bw;
+	u32 bw = fe->dtv_property_cache.bandwidth_hz;
+	u32 freq = fe->dtv_property_cache.frequency;
+	u32 delsys  = fe->dtv_property_cache.delivery_system;
 
 	if (xc5000_is_firmware_loaded(fe) != XC_RESULT_SUCCESS) {
 		if (xc_load_fw_and_init_tuner(fe) != XC_RESULT_SUCCESS) {
@@ -642,104 +644,77 @@ static int xc5000_set_params(struct dvb_frontend *fe,
 		}
 	}
 
-	dprintk(1, "%s() frequency=%d (Hz)\n", __func__, params->frequency);
+	dprintk(1, "%s() frequency=%d (Hz)\n", __func__, freq);
 
-	if (fe->ops.info.type == FE_ATSC) {
-		dprintk(1, "%s() ATSC\n", __func__);
-		switch (params->u.vsb.modulation) {
-		case VSB_8:
-		case VSB_16:
-			dprintk(1, "%s() VSB modulation\n", __func__);
-			priv->rf_mode = XC_RF_MODE_AIR;
-			priv->freq_hz = params->frequency - 1750000;
-			priv->bandwidth = BANDWIDTH_6_MHZ;
-			priv->video_standard = DTV6;
-			break;
-		case QAM_64:
-		case QAM_256:
-		case QAM_AUTO:
-			dprintk(1, "%s() QAM modulation\n", __func__);
-			priv->rf_mode = XC_RF_MODE_CABLE;
-			priv->freq_hz = params->frequency - 1750000;
-			priv->bandwidth = BANDWIDTH_6_MHZ;
-			priv->video_standard = DTV6;
-			break;
-		default:
-			return -EINVAL;
-		}
-	} else if (fe->ops.info.type == FE_OFDM) {
+	switch (delsys) {
+	case SYS_ATSC:
+		dprintk(1, "%s() VSB modulation\n", __func__);
+		priv->rf_mode = XC_RF_MODE_AIR;
+		priv->freq_hz = freq - 1750000;
+		priv->bandwidth = BANDWIDTH_6_MHZ;
+		priv->video_standard = DTV6;
+		break;
+	case SYS_DVBC_ANNEX_B:
+		dprintk(1, "%s() QAM modulation\n", __func__);
+		priv->rf_mode = XC_RF_MODE_CABLE;
+		priv->freq_hz = freq - 1750000;
+		priv->bandwidth = BANDWIDTH_6_MHZ;
+		priv->video_standard = DTV6;
+		break;
+	case SYS_DVBT:
+	case SYS_DVBT2:
 		dprintk(1, "%s() OFDM\n", __func__);
-		switch (params->u.ofdm.bandwidth) {
-		case BANDWIDTH_6_MHZ:
+		switch (bw) {
+		case 6000000:
 			priv->bandwidth = BANDWIDTH_6_MHZ;
 			priv->video_standard = DTV6;
-			priv->freq_hz = params->frequency - 1750000;
+			priv->freq_hz = freq - 1750000;
 			break;
-		case BANDWIDTH_7_MHZ:
+		case 7000000:
 			priv->bandwidth = BANDWIDTH_7_MHZ;
 			priv->video_standard = DTV7;
-			priv->freq_hz = params->frequency - 2250000;
+			priv->freq_hz = freq - 2250000;
 			break;
-		case BANDWIDTH_8_MHZ:
+		case 8000000:
 			priv->bandwidth = BANDWIDTH_8_MHZ;
 			priv->video_standard = DTV8;
-			priv->freq_hz = params->frequency - 2750000;
+			priv->freq_hz = freq - 2750000;
 			break;
 		default:
 			printk(KERN_ERR "xc5000 bandwidth not set!\n");
 			return -EINVAL;
 		}
 		priv->rf_mode = XC_RF_MODE_AIR;
-	} else if (fe->ops.info.type == FE_QAM) {
-		switch (params->u.qam.modulation) {
-		case QAM_256:
-		case QAM_AUTO:
-		case QAM_16:
-		case QAM_32:
-		case QAM_64:
-		case QAM_128:
-			dprintk(1, "%s() QAM modulation\n", __func__);
-			priv->rf_mode = XC_RF_MODE_CABLE;
-			/*
-			 * Using a higher bandwidth at the tuner filter may
-			 * allow inter-carrier interference.
-			 * So, determine the minimal channel spacing, in order
-			 * to better adjust the tuner filter.
-			 * According with ITU-T J.83, the bandwidth is given by:
-			 * bw = Simbol Rate * (1 + roll_off), where the roll_off
-			 * is equal to 0.15 for Annex A, and 0.13 for annex C
-			 */
-			if (fe->dtv_property_cache.rolloff == ROLLOFF_13)
-				bw = (params->u.qam.symbol_rate * 113) / 100;
-			else
-				bw = (params->u.qam.symbol_rate * 115) / 100;
-			if (bw <= 6000000) {
-				priv->bandwidth = BANDWIDTH_6_MHZ;
-				priv->video_standard = DTV6;
-				priv->freq_hz = params->frequency - 1750000;
-			} else if (bw <= 7000000) {
-				priv->bandwidth = BANDWIDTH_7_MHZ;
-				priv->video_standard = DTV7;
-				priv->freq_hz = params->frequency - 2250000;
-			} else {
-				priv->bandwidth = BANDWIDTH_8_MHZ;
-				priv->video_standard = DTV7_8;
-				priv->freq_hz = params->frequency - 2750000;
-			}
-			dprintk(1, "%s() Bandwidth %dMHz (%d)\n", __func__,
-				BANDWIDTH_6_MHZ ? 6: 8, bw);
-			break;
-		default:
-			dprintk(1, "%s() Unsupported QAM type\n", __func__);
-			return -EINVAL;
+	case SYS_DVBC_ANNEX_A:
+	case SYS_DVBC_ANNEX_C:
+		dprintk(1, "%s() QAM modulation\n", __func__);
+		priv->rf_mode = XC_RF_MODE_CABLE;
+		if (bw <= 6000000) {
+			priv->bandwidth = BANDWIDTH_6_MHZ;
+			priv->video_standard = DTV6;
+			priv->freq_hz = freq - 1750000;
+			b = 6;
+		} else if (bw <= 7000000) {
+			priv->bandwidth = BANDWIDTH_7_MHZ;
+			priv->video_standard = DTV7;
+			priv->freq_hz = freq - 2250000;
+			b = 7;
+		} else {
+			priv->bandwidth = BANDWIDTH_8_MHZ;
+			priv->video_standard = DTV7_8;
+			priv->freq_hz = freq - 2750000;
+			b = 8;
 		}
-	} else {
-		printk(KERN_ERR "xc5000 modulation type not supported!\n");
+		dprintk(1, "%s() Bandwidth %dMHz (%d)\n", __func__,
+			b, bw);
+		break;
+	default:
+		printk(KERN_ERR "xc5000: delivery system is not supported!\n");
 		return -EINVAL;
 	}
 
-	dprintk(1, "%s() frequency=%d (compensated)\n",
-		__func__, priv->freq_hz);
+	dprintk(1, "%s() frequency=%d (compensated to %d)\n",
+		__func__, freq, priv->freq_hz);
 
 	ret = xc_SetSignalSource(priv, priv->rf_mode);
 	if (ret != XC_RESULT_SUCCESS) {
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 821b2250ec70..66537b10132c 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -1011,7 +1011,7 @@ static void dtv_property_dump(struct dtv_property *tvp)
 
 static int is_legacy_delivery_system(fe_delivery_system_t s)
 {
-	if((s == SYS_UNDEFINED) || (s == SYS_DVBC_ANNEX_AC) ||
+	if((s == SYS_UNDEFINED) || (s == SYS_DVBC_ANNEX_A) ||
 	   (s == SYS_DVBC_ANNEX_B) || (s == SYS_DVBT) || (s == SYS_DVBS) ||
 	   (s == SYS_ATSC))
 		return 1;
@@ -1032,8 +1032,7 @@ static void dtv_property_cache_init(struct dvb_frontend *fe,
 		c->delivery_system = SYS_DVBS;
 		break;
 	case FE_QAM:
-		c->delivery_system = SYS_DVBC_ANNEX_AC;
-		c->rolloff = ROLLOFF_15; /* implied for Annex A */
+		c->delivery_system = SYS_DVBC_ANNEX_A;
 		break;
 	case FE_OFDM:
 		c->delivery_system = SYS_DVBT;
@@ -1144,9 +1143,10 @@ static void dtv_property_legacy_params_sync(struct dvb_frontend *fe)
  */
 static void dtv_property_adv_params_sync(struct dvb_frontend *fe)
 {
-	const struct dtv_frontend_properties *c = &fe->dtv_property_cache;
+	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
 	struct dvb_frontend_parameters *p = &fepriv->parameters_in;
+	u32 rolloff = 0;
 
 	p->frequency = c->frequency;
 	p->inversion = c->inversion;
@@ -1178,6 +1178,23 @@ static void dtv_property_adv_params_sync(struct dvb_frontend *fe)
 		else
 			p->u.ofdm.bandwidth = BANDWIDTH_AUTO;
 	}
+
+	/*
+	 * On DVB-C, the bandwidth is a function of roll-off and symbol rate.
+	 * The bandwidth is required for DVB-C tuners, in order to avoid
+	 * inter-channel noise. Instead of estimating the minimal required
+	 * bandwidth on every single driver, calculates it here and fills
+	 * it at the cache bandwidth parameter.
+	 * While not officially supported, a side effect of handling it at
+	 * the cache level is that a program could retrieve the bandwidth
+	 * via DTV_BANDWIDTH_HZ, wich may be useful for test programs.
+	 */
+	if (c->delivery_system == SYS_DVBC_ANNEX_A)
+		rolloff = 115;
+	if (c->delivery_system == SYS_DVBC_ANNEX_C)
+		rolloff = 113;
+	if (rolloff)
+		c->bandwidth_hz = (c->symbol_rate * rolloff) / 100;
 }
 
 static void dtv_property_cache_submit(struct dvb_frontend *fe)
diff --git a/drivers/media/dvb/frontends/drxk_hard.c b/drivers/media/dvb/frontends/drxk_hard.c
index 038e470bf039..a2c819651933 100644
--- a/drivers/media/dvb/frontends/drxk_hard.c
+++ b/drivers/media/dvb/frontends/drxk_hard.c
@@ -6215,6 +6215,7 @@ static int drxk_set_parameters(struct dvb_frontend *fe,
 			       struct dvb_frontend_parameters *p)
 {
 	struct drxk_state *state = fe->demodulator_priv;
+	u32 delsys  = fe->dtv_property_cache.delivery_system;
 	u32 IF;
 
 	dprintk(1, "\n");
@@ -6225,11 +6226,15 @@ static int drxk_set_parameters(struct dvb_frontend *fe,
 		return -EINVAL;
 	}
 
-	if (fe->ops.info.type == FE_QAM) {
-		if (fe->dtv_property_cache.rolloff == ROLLOFF_13)
-			state->m_itut_annex_c = true;
-		else
-			state->m_itut_annex_c = false;
+	switch (delsys) {
+	case SYS_DVBC_ANNEX_A:
+		state->m_itut_annex_c = false;
+		break;
+	case SYS_DVBC_ANNEX_C:
+		state->m_itut_annex_c = true;
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	if (fe->ops.i2c_gate_ctrl)
diff --git a/drivers/media/dvb/frontends/tda18271c2dd.c b/drivers/media/dvb/frontends/tda18271c2dd.c
index b66ca29704fc..0f8e9622bc96 100644
--- a/drivers/media/dvb/frontends/tda18271c2dd.c
+++ b/drivers/media/dvb/frontends/tda18271c2dd.c
@@ -1130,50 +1130,44 @@ static int set_params(struct dvb_frontend *fe,
 	struct tda_state *state = fe->tuner_priv;
 	int status = 0;
 	int Standard;
-	u32 bw;
+	u32 bw = fe->dtv_property_cache.bandwidth_hz;
+	u32 delsys  = fe->dtv_property_cache.delivery_system;
 
-	state->m_Frequency = params->frequency;
+	state->m_Frequency = fe->dtv_property_cache.frequency;
 
-	if (fe->ops.info.type == FE_OFDM)
-		switch (params->u.ofdm.bandwidth) {
-		case BANDWIDTH_6_MHZ:
+	switch (delsys) {
+	case  SYS_DVBT:
+	case  SYS_DVBT2:
+		switch (bw) {
+		case 6000000:
 			Standard = HF_DVBT_6MHZ;
 			break;
-		case BANDWIDTH_7_MHZ:
+		case 7000000:
 			Standard = HF_DVBT_7MHZ;
 			break;
-		default:
-		case BANDWIDTH_8_MHZ:
+		case 8000000:
 			Standard = HF_DVBT_8MHZ;
 			break;
+		default:
+			return -EINVAL;
 		}
-	else if (fe->ops.info.type == FE_QAM) {
-		/*
-		 * Using a higher bandwidth at the tuner filter may
-		 * allow inter-carrier interference.
-		 * So, determine the minimal channel spacing, in order
-		 * to better adjust the tuner filter.
-		 * According with ITU-T J.83, the bandwidth is given by:
-		 * bw = Simbol Rate * (1 + roll_off), where the roll_off
-		 * is equal to 0.15 for Annex A, and 0.13 for annex C
-		 */
-		if (fe->dtv_property_cache.rolloff == ROLLOFF_13)
-			bw = (params->u.qam.symbol_rate * 113) / 100;
-		else
-			bw = (params->u.qam.symbol_rate * 115) / 100;
+	case SYS_DVBC_ANNEX_A:
+	case SYS_DVBC_ANNEX_C:
 		if (bw <= 6000000)
 			Standard = HF_DVBC_6MHZ;
 		else if (bw <= 7000000)
 			Standard = HF_DVBC_7MHZ;
 		else
 			Standard = HF_DVBC_8MHZ;
-	} else
+	default:
 		return -EINVAL;
+	}
 	do {
-		status = RFTrackingFiltersCorrection(state, params->frequency);
+		status = RFTrackingFiltersCorrection(state, state->m_Frequency);
 		if (status < 0)
 			break;
-		status = ChannelConfiguration(state, params->frequency, Standard);
+		status = ChannelConfiguration(state, state->m_Frequency,
+					      Standard);
 		if (status < 0)
 			break;
 
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index b2a939f8f1e2..a3c762383f88 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -331,8 +331,6 @@ typedef enum fe_rolloff {
 	ROLLOFF_20,
 	ROLLOFF_25,
 	ROLLOFF_AUTO,
-	ROLLOFF_15,	/* DVB-C Annex A */
-	ROLLOFF_13,	/* DVB-C Annex C */
 } fe_rolloff_t;
 
 typedef enum fe_delivery_system {
-- 
cgit v1.2.3


From 45959ee7aa645815a5ce303a0ea1e48a21e67c6a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 12 Dec 2011 15:22:41 -0500
Subject: ftrace: Do not function trace inlined functions

When gcc inlines a function, it does not mark it with the mcount
prologue, which in turn means that inlined functions are not traced
by the function tracer. But if CONFIG_OPTIMIZE_INLINING is set, then
gcc is allowed not to inline a function that is marked inline.

Depending on the options and the compiler, a function may or may
not be traced by the function tracer, depending on whether gcc
decides to inline a function or not. This has caused several
problems in the pass becaues gcc is not always consistent with
what it decides to inline between different gcc versions.

Some places should not be traced (like paravirt native_* functions)
and these are mostly marked as inline. When gcc decides not to
inline the function, and if that function should not be traced, then
the ftrace function tracer will suddenly break when it use to work
fine. This becomes even harder to debug when different versions of
gcc will not inline that function, making the same kernel and config
work for some gcc versions and not work for others.

By making all functions marked inline to not be traced will remove
the ambiguity that gcc adds when it comes to tracing functions marked
inline. All gcc versions will be consistent with what functions are
traced and having volatile working code will be removed.

Note, only the inline macro when CONFIG_OPTIMIZE_INLINING is set needs
to have notrace added, as the attribute __always_inline will force
the function to be inlined and then not traced.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/compiler-gcc.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 59e4028e833d..3fd17c249221 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -50,6 +50,11 @@
 # define inline		inline		__attribute__((always_inline))
 # define __inline__	__inline__	__attribute__((always_inline))
 # define __inline	__inline	__attribute__((always_inline))
+#else
+/* A lot of inline functions can cause havoc with function tracing */
+# define inline		inline		notrace
+# define __inline__	__inline__	notrace
+# define __inline	__inline	notrace
 #endif
 
 #define __deprecated			__attribute__((deprecated))
-- 
cgit v1.2.3


From c88fd8634ea68e74c7d19fd2621b4078fd22864c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 16 Aug 2011 09:53:39 -0400
Subject: ftrace: Allow archs to modify code without stop machine

The stop machine method to modify all functions in the kernel
(some 20,000 of them) is the safest way to do so across all archs.
But some archs may not need this big hammer approach to modify code
on SMP machines, and can simply just update the code it needs.

Adding a weak function arch_ftrace_update_code() that now does the
stop machine, will also let any arch override this method.

If the arch needs to check the system and then decide if it can
avoid stop machine, it can still call ftrace_run_stop_machine() to
use the old method.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  31 ++++++
 kernel/trace/ftrace.c  | 253 +++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 246 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 26eafcef75be..4f0b6fec379d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -133,6 +133,8 @@ struct ftrace_func_command {
 int ftrace_arch_code_modify_prepare(void);
 int ftrace_arch_code_modify_post_process(void);
 
+void ftrace_bug(int err, unsigned long ip);
+
 struct seq_file;
 
 struct ftrace_probe_ops {
@@ -190,6 +192,35 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
 
+enum {
+	FTRACE_UPDATE_CALLS		= (1 << 0),
+	FTRACE_DISABLE_CALLS		= (1 << 1),
+	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
+	FTRACE_START_FUNC_RET		= (1 << 3),
+	FTRACE_STOP_FUNC_RET		= (1 << 4),
+};
+
+enum {
+	FTRACE_UPDATE_IGNORE,
+	FTRACE_UPDATE_MAKE_CALL,
+	FTRACE_UPDATE_MAKE_NOP,
+};
+
+void arch_ftrace_update_code(int command);
+
+struct ftrace_rec_iter;
+
+struct ftrace_rec_iter *ftrace_rec_iter_start(void);
+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter);
+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
+
+int ftrace_update_record(struct dyn_ftrace *rec, int enable);
+int ftrace_test_record(struct dyn_ftrace *rec, int enable);
+void ftrace_run_stop_machine(int command);
+int ftrace_location(unsigned long ip);
+
+extern ftrace_func_t ftrace_trace_function;
+
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void *data);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 25b4f4da0fe8..655b432fb890 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -947,13 +947,6 @@ struct ftrace_func_probe {
 	struct rcu_head		rcu;
 };
 
-enum {
-	FTRACE_UPDATE_CALLS		= (1 << 0),
-	FTRACE_DISABLE_CALLS		= (1 << 1),
-	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
-	FTRACE_START_FUNC_RET		= (1 << 3),
-	FTRACE_STOP_FUNC_RET		= (1 << 4),
-};
 struct ftrace_func_entry {
 	struct hlist_node hlist;
 	unsigned long ip;
@@ -1307,6 +1300,28 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 		}				\
 	}
 
+/**
+ * ftrace_location - return true if the ip giving is a traced location
+ * @ip: the instruction pointer to check
+ *
+ * Returns 1 if @ip given is a pointer to a ftrace location.
+ * That is, the instruction that is either a NOP or call to
+ * the function tracer. It checks the ftrace internal tables to
+ * determine if the address belongs or not.
+ */
+int ftrace_location(unsigned long ip)
+{
+	struct ftrace_page *pg;
+	struct dyn_ftrace *rec;
+
+	do_for_each_ftrace_rec(pg, rec) {
+		if (rec->ip == ip)
+			return 1;
+	} while_for_each_ftrace_rec();
+
+	return 0;
+}
+
 static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 				     int filter_hash,
 				     bool inc)
@@ -1475,7 +1490,19 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
 		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
 }
 
-static void ftrace_bug(int failed, unsigned long ip)
+/**
+ * ftrace_bug - report and shutdown function tracer
+ * @failed: The failed type (EFAULT, EINVAL, EPERM)
+ * @ip: The address that failed
+ *
+ * The arch code that enables or disables the function tracing
+ * can call ftrace_bug() when it has detected a problem in
+ * modifying the code. @failed should be one of either:
+ * EFAULT - if the problem happens on reading the @ip address
+ * EINVAL - if what is read at @ip is not what was expected
+ * EPERM - if the problem happens on writting to the @ip address
+ */
+void ftrace_bug(int failed, unsigned long ip)
 {
 	switch (failed) {
 	case -EFAULT:
@@ -1517,15 +1544,10 @@ int ftrace_text_reserved(void *start, void *end)
 	return 0;
 }
 
-
-static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int update)
+static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 {
-	unsigned long ftrace_addr;
 	unsigned long flag = 0UL;
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
-
 	/*
 	 * If we are updating calls:
 	 *
@@ -1537,20 +1559,74 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int update)
 	 * If we are disabling calls, then disable all records that
 	 * are enabled.
 	 */
-	if (update && (rec->flags & ~FTRACE_FL_MASK))
+	if (enable && (rec->flags & ~FTRACE_FL_MASK))
 		flag = FTRACE_FL_ENABLED;
 
 	/* If the state of this record hasn't changed, then do nothing */
 	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
-		return 0;
+		return FTRACE_UPDATE_IGNORE;
 
 	if (flag) {
-		rec->flags |= FTRACE_FL_ENABLED;
+		if (update)
+			rec->flags |= FTRACE_FL_ENABLED;
+		return FTRACE_UPDATE_MAKE_CALL;
+	}
+
+	if (update)
+		rec->flags &= ~FTRACE_FL_ENABLED;
+
+	return FTRACE_UPDATE_MAKE_NOP;
+}
+
+/**
+ * ftrace_update_record, set a record that now is tracing or not
+ * @rec: the record to update
+ * @enable: set to 1 if the record is tracing, zero to force disable
+ *
+ * The records that represent all functions that can be traced need
+ * to be updated when tracing has been enabled.
+ */
+int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+{
+	return ftrace_check_record(rec, enable, 1);
+}
+
+/**
+ * ftrace_test_record, check if the record has been enabled or not
+ * @rec: the record to test
+ * @enable: set to 1 to check if enabled, 0 if it is disabled
+ *
+ * The arch code may need to test if a record is already set to
+ * tracing to determine how to modify the function code that it
+ * represents.
+ */
+int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+{
+	return ftrace_check_record(rec, enable, 0);
+}
+
+static int
+__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+{
+	unsigned long ftrace_addr;
+	int ret;
+
+	ftrace_addr = (unsigned long)FTRACE_ADDR;
+
+	ret = ftrace_update_record(rec, enable);
+
+	switch (ret) {
+	case FTRACE_UPDATE_IGNORE:
+		return 0;
+
+	case FTRACE_UPDATE_MAKE_CALL:
 		return ftrace_make_call(rec, ftrace_addr);
+
+	case FTRACE_UPDATE_MAKE_NOP:
+		return ftrace_make_nop(NULL, rec, ftrace_addr);
 	}
 
-	rec->flags &= ~FTRACE_FL_ENABLED;
-	return ftrace_make_nop(NULL, rec, ftrace_addr);
+	return -1; /* unknow ftrace bug */
 }
 
 static void ftrace_replace_code(int update)
@@ -1576,6 +1652,78 @@ static void ftrace_replace_code(int update)
 	} while_for_each_ftrace_rec();
 }
 
+struct ftrace_rec_iter {
+	struct ftrace_page	*pg;
+	int			index;
+};
+
+/**
+ * ftrace_rec_iter_start, start up iterating over traced functions
+ *
+ * Returns an iterator handle that is used to iterate over all
+ * the records that represent address locations where functions
+ * are traced.
+ *
+ * May return NULL if no records are available.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_start(void)
+{
+	/*
+	 * We only use a single iterator.
+	 * Protected by the ftrace_lock mutex.
+	 */
+	static struct ftrace_rec_iter ftrace_rec_iter;
+	struct ftrace_rec_iter *iter = &ftrace_rec_iter;
+
+	iter->pg = ftrace_pages_start;
+	iter->index = 0;
+
+	/* Could have empty pages */
+	while (iter->pg && !iter->pg->index)
+		iter->pg = iter->pg->next;
+
+	if (!iter->pg)
+		return NULL;
+
+	return iter;
+}
+
+/**
+ * ftrace_rec_iter_next, get the next record to process.
+ * @iter: The handle to the iterator.
+ *
+ * Returns the next iterator after the given iterator @iter.
+ */
+struct ftrace_rec_iter *ftrace_rec_iter_next(struct ftrace_rec_iter *iter)
+{
+	iter->index++;
+
+	if (iter->index >= iter->pg->index) {
+		iter->pg = iter->pg->next;
+		iter->index = 0;
+
+		/* Could have empty pages */
+		while (iter->pg && !iter->pg->index)
+			iter->pg = iter->pg->next;
+	}
+
+	if (!iter->pg)
+		return NULL;
+
+	return iter;
+}
+
+/**
+ * ftrace_rec_iter_record, get the record at the iterator location
+ * @iter: The current iterator location
+ *
+ * Returns the record that the current @iter is at.
+ */
+struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
+{
+	return &iter->pg->records[iter->index];
+}
+
 static int
 ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
 {
@@ -1617,12 +1765,6 @@ static int __ftrace_modify_code(void *data)
 {
 	int *command = data;
 
-	/*
-	 * Do not call function tracer while we update the code.
-	 * We are in stop machine, no worrying about races.
-	 */
-	function_trace_stop++;
-
 	if (*command & FTRACE_UPDATE_CALLS)
 		ftrace_replace_code(1);
 	else if (*command & FTRACE_DISABLE_CALLS)
@@ -1636,21 +1778,33 @@ static int __ftrace_modify_code(void *data)
 	else if (*command & FTRACE_STOP_FUNC_RET)
 		ftrace_disable_ftrace_graph_caller();
 
-#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
-	/*
-	 * For archs that call ftrace_test_stop_func(), we must
-	 * wait till after we update all the function callers
-	 * before we update the callback. This keeps different
-	 * ops that record different functions from corrupting
-	 * each other.
-	 */
-	__ftrace_trace_function = __ftrace_trace_function_delay;
-#endif
-	function_trace_stop--;
-
 	return 0;
 }
 
+/**
+ * ftrace_run_stop_machine, go back to the stop machine method
+ * @command: The command to tell ftrace what to do
+ *
+ * If an arch needs to fall back to the stop machine method, the
+ * it can call this function.
+ */
+void ftrace_run_stop_machine(int command)
+{
+	stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
+/**
+ * arch_ftrace_update_code, modify the code to trace or not trace
+ * @command: The command that needs to be done
+ *
+ * Archs can override this function if it does not need to
+ * run stop_machine() to modify code.
+ */
+void __weak arch_ftrace_update_code(int command)
+{
+	ftrace_run_stop_machine(command);
+}
+
 static void ftrace_run_update_code(int command)
 {
 	int ret;
@@ -1659,8 +1813,31 @@ static void ftrace_run_update_code(int command)
 	FTRACE_WARN_ON(ret);
 	if (ret)
 		return;
+	/*
+	 * Do not call function tracer while we update the code.
+	 * We are in stop machine.
+	 */
+	function_trace_stop++;
 
-	stop_machine(__ftrace_modify_code, &command, NULL);
+	/*
+	 * By default we use stop_machine() to modify the code.
+	 * But archs can do what ever they want as long as it
+	 * is safe. The stop_machine() is the safest, but also
+	 * produces the most overhead.
+	 */
+	arch_ftrace_update_code(command);
+
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	/*
+	 * For archs that call ftrace_test_stop_func(), we must
+	 * wait till after we update all the function callers
+	 * before we update the callback. This keeps different
+	 * ops that record different functions from corrupting
+	 * each other.
+	 */
+	__ftrace_trace_function = __ftrace_trace_function_delay;
+#endif
+	function_trace_stop--;
 
 	ret = ftrace_arch_code_modify_post_process();
 	FTRACE_WARN_ON(ret);
-- 
cgit v1.2.3


From 3208230983a0ee3d95be22d463257e530c684956 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 16 Dec 2011 14:42:37 -0500
Subject: ftrace: Remove usage of "freed" records

Records that are added to the function trace table are
permanently there, except for modules. By separating out the
modules to their own pages that can be freed in one shot
we can remove the "freed" flag and simplify some of the record
management.

Another benefit of doing this is that we can also move the
records around; sort them.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   1 -
 kernel/trace/ftrace.c  | 100 ++++++++++++++++++++++++-------------------------
 2 files changed, 49 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4f0b6fec379d..3f79bc458bff 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -163,7 +163,6 @@ extern int ftrace_text_reserved(void *start, void *end);
 
 enum {
 	FTRACE_FL_ENABLED	= (1 << 30),
-	FTRACE_FL_FREE		= (1 << 31),
 };
 
 #define FTRACE_FL_MASK		(0x3UL << 30)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 655b432fb890..be6888f40d2b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -996,8 +996,6 @@ struct ftrace_page {
 static struct ftrace_page	*ftrace_pages_start;
 static struct ftrace_page	*ftrace_pages;
 
-static struct dyn_ftrace *ftrace_free_records;
-
 static struct ftrace_func_entry *
 ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip)
 {
@@ -1421,32 +1419,8 @@ static void ftrace_hash_rec_enable(struct ftrace_ops *ops,
 	__ftrace_hash_rec_update(ops, filter_hash, 1);
 }
 
-static void ftrace_free_rec(struct dyn_ftrace *rec)
-{
-	rec->freelist = ftrace_free_records;
-	ftrace_free_records = rec;
-	rec->flags |= FTRACE_FL_FREE;
-}
-
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
-	struct dyn_ftrace *rec;
-
-	/* First check for freed records */
-	if (ftrace_free_records) {
-		rec = ftrace_free_records;
-
-		if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
-			FTRACE_WARN_ON_ONCE(1);
-			ftrace_free_records = NULL;
-			return NULL;
-		}
-
-		ftrace_free_records = rec->freelist;
-		memset(rec, 0, sizeof(*rec));
-		return rec;
-	}
-
 	if (ftrace_pages->index == ENTRIES_PER_PAGE) {
 		if (!ftrace_pages->next) {
 			/* allocate another page */
@@ -1639,10 +1613,6 @@ static void ftrace_replace_code(int update)
 		return;
 
 	do_for_each_ftrace_rec(pg, rec) {
-		/* Skip over free records */
-		if (rec->flags & FTRACE_FL_FREE)
-			continue;
-
 		failed = __ftrace_replace_code(rec, update);
 		if (failed) {
 			ftrace_bug(failed, rec->ip);
@@ -2007,11 +1977,8 @@ static int ftrace_update_code(struct module *mod)
 		 * Do the initial record conversion from mcount jump
 		 * to the NOP instructions.
 		 */
-		if (!ftrace_code_disable(mod, p)) {
-			ftrace_free_rec(p);
-			/* Game over */
+		if (!ftrace_code_disable(mod, p))
 			break;
-		}
 
 		ftrace_update_cnt++;
 
@@ -2026,10 +1993,8 @@ static int ftrace_update_code(struct module *mod)
 		 */
 		if (ftrace_start_up && ref) {
 			int failed = __ftrace_replace_code(p, 1);
-			if (failed) {
+			if (failed)
 				ftrace_bug(failed, p->ip);
-				ftrace_free_rec(p);
-			}
 		}
 	}
 
@@ -2223,9 +2188,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
 		}
 	} else {
 		rec = &iter->pg->records[iter->idx++];
-		if ((rec->flags & FTRACE_FL_FREE) ||
-
-		    ((iter->flags & FTRACE_ITER_FILTER) &&
+		if (((iter->flags & FTRACE_ITER_FILTER) &&
 		     !(ftrace_lookup_ip(ops->filter_hash, rec->ip))) ||
 
 		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
@@ -2602,7 +2565,6 @@ match_records(struct ftrace_hash *hash, char *buff,
 		goto out_unlock;
 
 	do_for_each_ftrace_rec(pg, rec) {
-
 		if (ftrace_match_record(rec, mod, search, search_len, type)) {
 			ret = enter_record(hash, rec, not);
 			if (ret < 0) {
@@ -3446,9 +3408,6 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
 
 	do_for_each_ftrace_rec(pg, rec) {
 
-		if (rec->flags & FTRACE_FL_FREE)
-			continue;
-
 		if (ftrace_match_record(rec, NULL, search, search_len, type)) {
 			/* if it is in the array */
 			exists = false;
@@ -3566,6 +3525,27 @@ static int ftrace_process_locs(struct module *mod,
 	unsigned long flags = 0; /* Shut up gcc */
 
 	mutex_lock(&ftrace_lock);
+	/*
+	 * Core and each module needs their own pages, as
+	 * modules will free them when they are removed.
+	 * Force a new page to be allocated for modules.
+	 */
+	if (mod) {
+		if (!ftrace_pages)
+			return -ENOMEM;
+
+		/*
+		 * If the last page was full, it will be
+		 * allocated anyway.
+		 */
+		if (ftrace_pages->index != ENTRIES_PER_PAGE) {
+			ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
+			if (!ftrace_pages->next)
+				return -ENOMEM;
+			ftrace_pages = ftrace_pages->next;
+		}
+	}
+
 	p = start;
 	while (p < end) {
 		addr = ftrace_call_adjust(*p++);
@@ -3599,9 +3579,13 @@ static int ftrace_process_locs(struct module *mod,
 }
 
 #ifdef CONFIG_MODULES
+
+#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
+
 void ftrace_release_mod(struct module *mod)
 {
 	struct dyn_ftrace *rec;
+	struct ftrace_page **last_pg;
 	struct ftrace_page *pg;
 
 	mutex_lock(&ftrace_lock);
@@ -3609,16 +3593,30 @@ void ftrace_release_mod(struct module *mod)
 	if (ftrace_disabled)
 		goto out_unlock;
 
-	do_for_each_ftrace_rec(pg, rec) {
+	/*
+	 * Each module has its own ftrace_pages, remove
+	 * them from the list.
+	 */
+	last_pg = &ftrace_pages_start;
+	for (pg = ftrace_pages_start; pg; pg = *last_pg) {
+		rec = &pg->records[0];
 		if (within_module_core(rec->ip, mod)) {
 			/*
-			 * rec->ip is changed in ftrace_free_rec()
-			 * It should not between s and e if record was freed.
+			 * As core pages are first, the first
+			 * page should never be a module page.
 			 */
-			FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
-			ftrace_free_rec(rec);
-		}
-	} while_for_each_ftrace_rec();
+			if (WARN_ON(pg == ftrace_pages_start))
+				goto out_unlock;
+
+			/* Check if we are deleting the last page */
+			if (pg == ftrace_pages)
+				ftrace_pages = next_to_ftrace_page(last_pg);
+
+			*last_pg = pg->next;
+			free_page((unsigned long)pg);
+		} else
+			last_pg = &pg->next;
+	}
  out_unlock:
 	mutex_unlock(&ftrace_lock);
 }
-- 
cgit v1.2.3


From 85ae32ae019bc1c2cc22e5f51fe0c9f2812ef68c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 16 Dec 2011 16:30:31 -0500
Subject: ftrace: Replace record newlist with record page list

As new functions come in to be initalized from mcount to nop,
they are done by groups of pages. Whether it is the core kernel
or a module. There's no need to keep track of these on a per record
basis.

At startup, and as any module is loaded, the functions to be
traced are stored in a group of pages and added to the function
list at the end. We just need to keep a pointer to the first
page of the list that was added, and use that to know where to
start on the list for initializing functions.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  5 +---
 kernel/trace/ftrace.c  | 68 +++++++++++++++++++++++++++-----------------------
 2 files changed, 38 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3f79bc458bff..31b9fd7aedcd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -173,10 +173,7 @@ struct dyn_ftrace {
 		unsigned long		ip; /* address of mcount call-site */
 		struct dyn_ftrace	*freelist;
 	};
-	union {
-		unsigned long		flags;
-		struct dyn_ftrace	*newlist;
-	};
+	unsigned long		flags;
 	struct dyn_arch_ftrace		arch;
 };
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2e7218869fe9..366d7881f188 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -977,8 +977,6 @@ static struct ftrace_ops global_ops = {
 	.filter_hash		= EMPTY_HASH,
 };
 
-static struct dyn_ftrace *ftrace_new_addrs;
-
 static DEFINE_MUTEX(ftrace_regex_lock);
 
 struct ftrace_page {
@@ -988,6 +986,8 @@ struct ftrace_page {
 	int			size;
 };
 
+static struct ftrace_page *ftrace_new_pgs;
+
 #define ENTRY_SIZE sizeof(struct dyn_ftrace)
 #define ENTRIES_PER_PAGE (PAGE_SIZE / ENTRY_SIZE)
 
@@ -1445,8 +1445,6 @@ ftrace_record_ip(unsigned long ip)
 		return NULL;
 
 	rec->ip = ip;
-	rec->newlist = ftrace_new_addrs;
-	ftrace_new_addrs = rec;
 
 	return rec;
 }
@@ -1936,9 +1934,11 @@ static int ops_traces_mod(struct ftrace_ops *ops)
 
 static int ftrace_update_code(struct module *mod)
 {
+	struct ftrace_page *pg;
 	struct dyn_ftrace *p;
 	cycle_t start, stop;
 	unsigned long ref = 0;
+	int i;
 
 	/*
 	 * When adding a module, we need to check if tracers are
@@ -1960,41 +1960,44 @@ static int ftrace_update_code(struct module *mod)
 	start = ftrace_now(raw_smp_processor_id());
 	ftrace_update_cnt = 0;
 
-	while (ftrace_new_addrs) {
+	for (pg = ftrace_new_pgs; pg; pg = pg->next) {
 
-		/* If something went wrong, bail without enabling anything */
-		if (unlikely(ftrace_disabled))
-			return -1;
+		for (i = 0; i < pg->index; i++) {
+			/* If something went wrong, bail without enabling anything */
+			if (unlikely(ftrace_disabled))
+				return -1;
 
-		p = ftrace_new_addrs;
-		ftrace_new_addrs = p->newlist;
-		p->flags = ref;
+			p = &pg->records[i];
+			p->flags = ref;
 
-		/*
-		 * Do the initial record conversion from mcount jump
-		 * to the NOP instructions.
-		 */
-		if (!ftrace_code_disable(mod, p))
-			break;
+			/*
+			 * Do the initial record conversion from mcount jump
+			 * to the NOP instructions.
+			 */
+			if (!ftrace_code_disable(mod, p))
+				break;
 
-		ftrace_update_cnt++;
+			ftrace_update_cnt++;
 
-		/*
-		 * If the tracing is enabled, go ahead and enable the record.
-		 *
-		 * The reason not to enable the record immediatelly is the
-		 * inherent check of ftrace_make_nop/ftrace_make_call for
-		 * correct previous instructions.  Making first the NOP
-		 * conversion puts the module to the correct state, thus
-		 * passing the ftrace_make_call check.
-		 */
-		if (ftrace_start_up && ref) {
-			int failed = __ftrace_replace_code(p, 1);
-			if (failed)
-				ftrace_bug(failed, p->ip);
+			/*
+			 * If the tracing is enabled, go ahead and enable the record.
+			 *
+			 * The reason not to enable the record immediatelly is the
+			 * inherent check of ftrace_make_nop/ftrace_make_call for
+			 * correct previous instructions.  Making first the NOP
+			 * conversion puts the module to the correct state, thus
+			 * passing the ftrace_make_call check.
+			 */
+			if (ftrace_start_up && ref) {
+				int failed = __ftrace_replace_code(p, 1);
+				if (failed)
+					ftrace_bug(failed, p->ip);
+			}
 		}
 	}
 
+	ftrace_new_pgs = NULL;
+
 	stop = ftrace_now(raw_smp_processor_id());
 	ftrace_update_time = stop - start;
 	ftrace_update_tot_cnt += ftrace_update_cnt;
@@ -3632,6 +3635,9 @@ static int ftrace_process_locs(struct module *mod,
 			break;
 	}
 
+	/* These new locations need to be initialized */
+	ftrace_new_pgs = pg;
+
 	/*
 	 * We only need to disable interrupts on start up
 	 * because we are modifying code that an interrupt
-- 
cgit v1.2.3


From fc13cb0ce45296f331263a6034aa1814203e1ac3 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 19 Dec 2011 14:41:25 -0500
Subject: ftrace: Allow other users of function tracing to use the output
 listing

The function tracer is set up to allow any other subsystem (like perf)
to use it. Ftrace already has a way to list what functions are enabled
by the global_ops. It would be very helpful to let other users of
the function tracer to be able to use the same code.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 35 +++++++++++++++++++++++++++++++++++
 kernel/trace/ftrace.c  | 41 +++++++++++++++++++++++++----------------
 2 files changed, 60 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 31b9fd7aedcd..aa7559f0a224 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -202,6 +202,14 @@ enum {
 	FTRACE_UPDATE_MAKE_NOP,
 };
 
+enum {
+	FTRACE_ITER_FILTER	= (1 << 0),
+	FTRACE_ITER_NOTRACE	= (1 << 1),
+	FTRACE_ITER_PRINTALL	= (1 << 2),
+	FTRACE_ITER_HASH	= (1 << 3),
+	FTRACE_ITER_ENABLED	= (1 << 4),
+};
+
 void arch_ftrace_update_code(int command);
 
 struct ftrace_rec_iter;
@@ -217,6 +225,15 @@ int ftrace_location(unsigned long ip);
 
 extern ftrace_func_t ftrace_trace_function;
 
+int ftrace_regex_open(struct ftrace_ops *ops, int flag,
+		  struct inode *inode, struct file *file);
+ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
+			    size_t cnt, loff_t *ppos);
+ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
+			     size_t cnt, loff_t *ppos);
+loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
+int ftrace_regex_release(struct inode *inode, struct file *file);
+
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void *data);
@@ -311,6 +328,24 @@ static inline int ftrace_text_reserved(void *start, void *end)
 {
 	return 0;
 }
+
+/*
+ * Again users of functions that have ftrace_ops may not
+ * have them defined when ftrace is not enabled, but these
+ * functions may still be called. Use a macro instead of inline.
+ */
+#define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
+
+static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
+			    size_t cnt, loff_t *ppos) { return -ENODEV; }
+static inline ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
+			     size_t cnt, loff_t *ppos) { return -ENODEV; }
+static inline loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
+{
+	return -ENODEV;
+}
+static inline int
+ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e1ee07f81ca2..5b105c5ddc0c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2134,14 +2134,6 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
 	return 0;
 }
 
-enum {
-	FTRACE_ITER_FILTER	= (1 << 0),
-	FTRACE_ITER_NOTRACE	= (1 << 1),
-	FTRACE_ITER_PRINTALL	= (1 << 2),
-	FTRACE_ITER_HASH	= (1 << 3),
-	FTRACE_ITER_ENABLED	= (1 << 4),
-};
-
 #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
 
 struct ftrace_iterator {
@@ -2249,7 +2241,7 @@ static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
-	struct ftrace_ops *ops = &global_ops;
+	struct ftrace_ops *ops = iter->ops;
 	struct dyn_ftrace *rec = NULL;
 
 	if (unlikely(ftrace_disabled))
@@ -2305,7 +2297,7 @@ static void reset_iter_read(struct ftrace_iterator *iter)
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
 	struct ftrace_iterator *iter = m->private;
-	struct ftrace_ops *ops = &global_ops;
+	struct ftrace_ops *ops = iter->ops;
 	void *p = NULL;
 	loff_t l;
 
@@ -2414,6 +2406,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
 		return -ENOMEM;
 
 	iter->pg = ftrace_pages_start;
+	iter->ops = &global_ops;
 
 	ret = seq_open(file, &show_ftrace_seq_ops);
 	if (!ret) {
@@ -2442,6 +2435,7 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
 
 	iter->pg = ftrace_pages_start;
 	iter->flags = FTRACE_ITER_ENABLED;
+	iter->ops = &global_ops;
 
 	ret = seq_open(file, &show_ftrace_seq_ops);
 	if (!ret) {
@@ -2462,7 +2456,23 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
 	mutex_unlock(&ftrace_lock);
 }
 
-static int
+/**
+ * ftrace_regex_open - initialize function tracer filter files
+ * @ops: The ftrace_ops that hold the hash filters
+ * @flag: The type of filter to process
+ * @inode: The inode, usually passed in to your open routine
+ * @file: The file, usually passed in to your open routine
+ *
+ * ftrace_regex_open() initializes the filter files for the
+ * @ops. Depending on @flag it may process the filter hash or
+ * the notrace hash of @ops. With this called from the open
+ * routine, you can use ftrace_filter_write() for the write
+ * routine if @flag has FTRACE_ITER_FILTER set, or
+ * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
+ * ftrace_regex_lseek() should be used as the lseek routine, and
+ * release must call ftrace_regex_release().
+ */
+int
 ftrace_regex_open(struct ftrace_ops *ops, int flag,
 		  struct inode *inode, struct file *file)
 {
@@ -2542,7 +2552,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
 				 inode, file);
 }
 
-static loff_t
+loff_t
 ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
 {
 	loff_t ret;
@@ -3095,14 +3105,14 @@ out_unlock:
 	return ret;
 }
 
-static ssize_t
+ssize_t
 ftrace_filter_write(struct file *file, const char __user *ubuf,
 		    size_t cnt, loff_t *ppos)
 {
 	return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
 }
 
-static ssize_t
+ssize_t
 ftrace_notrace_write(struct file *file, const char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
 {
@@ -3292,8 +3302,7 @@ static void __init set_ftrace_early_filters(void)
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 }
 
-static int
-ftrace_regex_release(struct inode *inode, struct file *file)
+int ftrace_regex_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = (struct seq_file *)file->private_data;
 	struct ftrace_iterator *iter;
-- 
cgit v1.2.3


From 69a3083c4a7df0322d97bb2b43a33cb12af8131a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 19 Dec 2011 15:21:16 -0500
Subject: ftrace: Decouple hash items from showing filtered functions

The set_ftrace_filter shows "hashed" functions, which are functions
that are added with operations to them (like traceon and traceoff).

As other subsystems may be able to show what functions they are
using for function tracing, the hash items should no longer
be shown just because the FILTER flag is set. As they have nothing
to do with other subsystems filters.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  5 +++--
 kernel/trace/ftrace.c  | 16 ++++++++--------
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index aa7559f0a224..d1ff0de18970 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -206,8 +206,9 @@ enum {
 	FTRACE_ITER_FILTER	= (1 << 0),
 	FTRACE_ITER_NOTRACE	= (1 << 1),
 	FTRACE_ITER_PRINTALL	= (1 << 2),
-	FTRACE_ITER_HASH	= (1 << 3),
-	FTRACE_ITER_ENABLED	= (1 << 4),
+	FTRACE_ITER_DO_HASH	= (1 << 3),
+	FTRACE_ITER_HASH	= (1 << 4),
+	FTRACE_ITER_ENABLED	= (1 << 5),
 };
 
 void arch_ftrace_update_code(int command);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b105c5ddc0c..5728d9aa632e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2198,6 +2198,9 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
 	void *p = NULL;
 	loff_t l;
 
+	if (!(iter->flags & FTRACE_ITER_DO_HASH))
+		return NULL;
+
 	if (iter->func_pos > *pos)
 		return NULL;
 
@@ -2343,12 +2346,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 			break;
 	}
 
-	if (!p) {
-		if (iter->flags & FTRACE_ITER_FILTER)
-			return t_hash_start(m, pos);
-
-		return NULL;
-	}
+	if (!p)
+		return t_hash_start(m, pos);
 
 	return iter;
 }
@@ -2541,8 +2540,9 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
 static int
 ftrace_filter_open(struct inode *inode, struct file *file)
 {
-	return ftrace_regex_open(&global_ops, FTRACE_ITER_FILTER,
-				 inode, file);
+	return ftrace_regex_open(&global_ops,
+			FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH,
+			inode, file);
 }
 
 static int
-- 
cgit v1.2.3


From 2a85a37f168d2b4d74d493b578af4dc9032be92e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 19 Dec 2011 21:57:44 -0500
Subject: ftrace: Allow access to the boot time function enabling

Change set_ftrace_early_filter() to ftrace_set_early_filter()
and make it a global function. This will allow other subsystems
in the kernel to be able to enable function tracing at start
up and reuse the ftrace function parsing code.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 3 +++
 kernel/trace/ftrace.c  | 8 ++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d1ff0de18970..41df6f501656 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -235,6 +235,9 @@ ssize_t ftrace_notrace_write(struct file *file, const char __user *ubuf,
 loff_t ftrace_regex_lseek(struct file *file, loff_t offset, int origin);
 int ftrace_regex_release(struct inode *inode, struct file *file);
 
+void __init
+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
+
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void *data);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5728d9aa632e..683d559a0eef 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3279,8 +3279,8 @@ static void __init set_ftrace_early_graph(char *buf)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-static void __init
-set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
+void __init
+ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 {
 	char *func;
 
@@ -3293,9 +3293,9 @@ set_ftrace_early_filter(struct ftrace_ops *ops, char *buf, int enable)
 static void __init set_ftrace_early_filters(void)
 {
 	if (ftrace_filter_buf[0])
-		set_ftrace_early_filter(&global_ops, ftrace_filter_buf, 1);
+		ftrace_set_early_filter(&global_ops, ftrace_filter_buf, 1);
 	if (ftrace_notrace_buf[0])
-		set_ftrace_early_filter(&global_ops, ftrace_notrace_buf, 0);
+		ftrace_set_early_filter(&global_ops, ftrace_notrace_buf, 0);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (ftrace_graph_buf[0])
 		set_ftrace_early_graph(ftrace_graph_buf);
-- 
cgit v1.2.3


From 62268ce9170c5466332c046ff6ddafcb67751502 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 13 Dec 2011 23:48:03 +0800
Subject: dmaengine: add DMA_TRANS_NONE to dma_transfer_direction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before dma_transfer_direction was introduced to replace
dma_data_direction, some dmaengine device uses DMA_NONE of
dma_data_direction for some talk with its client drivers.
The mxs-dma and its clients mxs-mmc and gpmi-nand are such case.

This patch adds DMA_TRANS_NONE to dma_transfer_direction and
migrate the DMA_NONE use in mxs-dma to it.

It also fixes the compile warning below.

CC      drivers/dma/mxs-dma.o
drivers/dma/mxs-dma.c: In function ‘mxs_dma_prep_slave_sg’:
drivers/dma/mxs-dma.c:420:16: warning: comparison between ‘enum dma_transfer_direction’ and ‘enum dma_data_direction’

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/mxs-dma.c     | 2 +-
 include/linux/dmaengine.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 6548595c26dc..493af2f6e33a 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -391,7 +391,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 		idx = 0;
 	}
 
-	if (direction == DMA_NONE) {
+	if (direction == DMA_TRANS_NONE) {
 		ccw = &mxs_chan->ccw[idx++];
 		pio = (u32 *) sgl;
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 5532bb8b500c..679b349d9b66 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -88,6 +88,7 @@ enum dma_transfer_direction {
 	DMA_MEM_TO_DEV,
 	DMA_DEV_TO_MEM,
 	DMA_DEV_TO_DEV,
+	DMA_TRANS_NONE,
 };
 
 /**
-- 
cgit v1.2.3


From 4e82786f7039cb707c031dcf0dc84c025e149487 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Tue, 13 Dec 2011 23:48:05 +0800
Subject: mtd: fix compile error for gpmi-nand

The driver gpmi-nand should compile at least.  This patch adds the
missing gpmi-nand.h to fix the compile error below.

  CC      drivers/mtd/nand/gpmi-nand/gpmi-nand.o
  CC      drivers/mtd/nand/gpmi-nand/gpmi-lib.o
drivers/mtd/nand/gpmi-nand/gpmi-nand.c:25:33: fatal error: linux/mtd/gpmi-nand.h: No such file or directory
drivers/mtd/nand/gpmi-nand/gpmi-lib.c:21:33: fatal error: linux/mtd/gpmi-nand.h: No such file or directory

This header is grabbed from patch below, which has not been postponed
for merging.

  [PATCH v8 1/4] ARM: mxs: add GPMI-NAND support for imx23/imx28
  http://permalink.gmane.org/gmane.linux.drivers.mtd/37338

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/mtd/gpmi-nand.h | 68 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 include/linux/mtd/gpmi-nand.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
new file mode 100644
index 000000000000..69b6dbf46b5e
--- /dev/null
+++ b/include/linux/mtd/gpmi-nand.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef __MACH_MXS_GPMI_NAND_H__
+#define __MACH_MXS_GPMI_NAND_H__
+
+/* The size of the resources is fixed. */
+#define GPMI_NAND_RES_SIZE	6
+
+/* Resource names for the GPMI NAND driver. */
+#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "GPMI NAND GPMI Registers"
+#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME  "GPMI NAND GPMI Interrupt"
+#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "GPMI NAND BCH Registers"
+#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "GPMI NAND BCH Interrupt"
+#define GPMI_NAND_DMA_CHANNELS_RES_NAME    "GPMI NAND DMA Channels"
+#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "GPMI NAND DMA Interrupt"
+
+/**
+ * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
+ *
+ * This structure communicates platform-specific information to the GPMI NAND
+ * driver that can't be expressed as resources.
+ *
+ * @platform_init:           A pointer to a function the driver will call to
+ *                           initialize the platform (e.g., set up the pin mux).
+ * @min_prop_delay_in_ns:    Minimum propagation delay of GPMI signals to and
+ *                           from the NAND Flash device, in nanoseconds.
+ * @max_prop_delay_in_ns:    Maximum propagation delay of GPMI signals to and
+ *                           from the NAND Flash device, in nanoseconds.
+ * @max_chip_count:          The maximum number of chips for which the driver
+ *                           should configure the hardware. This value most
+ *                           likely reflects the number of pins that are
+ *                           connected to a NAND Flash device. If this is
+ *                           greater than the SoC hardware can support, the
+ *                           driver will print a message and fail to initialize.
+ * @partitions:              An optional pointer to an array of partition
+ *                           descriptions.
+ * @partition_count:         The number of elements in the partitions array.
+ */
+struct gpmi_nand_platform_data {
+	/* SoC hardware information. */
+	int		(*platform_init)(void);
+
+	/* NAND Flash information. */
+	unsigned int	min_prop_delay_in_ns;
+	unsigned int	max_prop_delay_in_ns;
+	unsigned int	max_chip_count;
+
+	/* Medium information. */
+	struct		mtd_partition *partitions;
+	unsigned	partition_count;
+};
+#endif
-- 
cgit v1.2.3


From 5245db49d44e6033fece4d9f5946f8970c0d9ca1 Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Tue, 27 Dec 2011 21:21:17 -0800
Subject: Input: add driver for AUO In-Cell touchscreens using pixcir ICs

Some displays from AUO have a so called in-cell touchscreen, meaning it
is built directly into the display unit.

Touchdata is gathered through PIXCIR Tango-ICs and processed in an
Atmel ATmega168P with custom firmware. Communication between the host
system and ATmega is done via I2C.

Devices using this touch solution include the Dell Streak5 and the family
of Qisda ebook readers.

The driver reports single- and multi-touch events including touch area
values.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig         |  13 +
 drivers/input/touchscreen/Makefile        |   1 +
 drivers/input/touchscreen/auo-pixcir-ts.c | 652 ++++++++++++++++++++++++++++++
 include/linux/input/auo-pixcir-ts.h       |  56 +++
 4 files changed, 722 insertions(+)
 create mode 100644 drivers/input/touchscreen/auo-pixcir-ts.c
 create mode 100644 include/linux/input/auo-pixcir-ts.h

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 2b456a915d77..a121e36e5a47 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -98,6 +98,19 @@ config TOUCHSCREEN_ATMEL_MXT
 	  To compile this driver as a module, choose M here: the
 	  module will be called atmel_mxt_ts.
 
+config TOUCHSCREEN_AUO_PIXCIR
+	tristate "AUO in-cell touchscreen using Pixcir ICs"
+	depends on I2C
+	depends on GPIOLIB
+	help
+	  Say Y here if you have a AUO display with in-cell touchscreen
+	  using Pixcir ICs.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called auo-pixcir-ts.
+
 config TOUCHSCREEN_BITSY
 	tristate "Compaq iPAQ H3600 (Bitsy) touchscreen"
 	depends on SA1100_BITSY
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index a09c546b33b7..f0b4d16d39a6 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_TOUCHSCREEN_AD7879_SPI)	+= ad7879-spi.o
 obj-$(CONFIG_TOUCHSCREEN_ADS7846)	+= ads7846.o
 obj-$(CONFIG_TOUCHSCREEN_ATMEL_MXT)	+= atmel_mxt_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ATMEL_TSADCC)	+= atmel_tsadcc.o
+obj-$(CONFIG_TOUCHSCREEN_AUO_PIXCIR)	+= auo-pixcir-ts.o
 obj-$(CONFIG_TOUCHSCREEN_BITSY)		+= h3600_ts_input.o
 obj-$(CONFIG_TOUCHSCREEN_BU21013)       += bu21013_ts.o
 obj-$(CONFIG_TOUCHSCREEN_CY8CTMG110)	+= cy8ctmg110_ts.o
diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c
new file mode 100644
index 000000000000..94fb9fbb08a9
--- /dev/null
+++ b/drivers/input/touchscreen/auo-pixcir-ts.c
@@ -0,0 +1,652 @@
+/*
+ * Driver for AUO in-cell touchscreens
+ *
+ * Copyright (c) 2011 Heiko Stuebner <heiko@sntech.de>
+ *
+ * loosely based on auo_touch.c from Dell Streak vendor-kernel
+ *
+ * Copyright (c) 2008 QUALCOMM Incorporated.
+ * Copyright (c) 2008 QUALCOMM USA, INC.
+ *
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/jiffies.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/input/auo-pixcir-ts.h>
+
+/*
+ * Coordinate calculation:
+ * X1 = X1_LSB + X1_MSB*256
+ * Y1 = Y1_LSB + Y1_MSB*256
+ * X2 = X2_LSB + X2_MSB*256
+ * Y2 = Y2_LSB + Y2_MSB*256
+ */
+#define AUO_PIXCIR_REG_X1_LSB		0x00
+#define AUO_PIXCIR_REG_X1_MSB		0x01
+#define AUO_PIXCIR_REG_Y1_LSB		0x02
+#define AUO_PIXCIR_REG_Y1_MSB		0x03
+#define AUO_PIXCIR_REG_X2_LSB		0x04
+#define AUO_PIXCIR_REG_X2_MSB		0x05
+#define AUO_PIXCIR_REG_Y2_LSB		0x06
+#define AUO_PIXCIR_REG_Y2_MSB		0x07
+
+#define AUO_PIXCIR_REG_STRENGTH		0x0d
+#define AUO_PIXCIR_REG_STRENGTH_X1_LSB	0x0e
+#define AUO_PIXCIR_REG_STRENGTH_X1_MSB	0x0f
+
+#define AUO_PIXCIR_REG_RAW_DATA_X	0x2b
+#define AUO_PIXCIR_REG_RAW_DATA_Y	0x4f
+
+#define AUO_PIXCIR_REG_X_SENSITIVITY	0x6f
+#define AUO_PIXCIR_REG_Y_SENSITIVITY	0x70
+#define AUO_PIXCIR_REG_INT_SETTING	0x71
+#define AUO_PIXCIR_REG_INT_WIDTH	0x72
+#define AUO_PIXCIR_REG_POWER_MODE	0x73
+
+#define AUO_PIXCIR_REG_VERSION		0x77
+#define AUO_PIXCIR_REG_CALIBRATE	0x78
+
+#define AUO_PIXCIR_REG_TOUCHAREA_X1	0x1e
+#define AUO_PIXCIR_REG_TOUCHAREA_Y1	0x1f
+#define AUO_PIXCIR_REG_TOUCHAREA_X2	0x20
+#define AUO_PIXCIR_REG_TOUCHAREA_Y2	0x21
+
+#define AUO_PIXCIR_REG_EEPROM_CALIB_X	0x42
+#define AUO_PIXCIR_REG_EEPROM_CALIB_Y	0xad
+
+#define AUO_PIXCIR_INT_TPNUM_MASK	0xe0
+#define AUO_PIXCIR_INT_TPNUM_SHIFT	5
+#define AUO_PIXCIR_INT_RELEASE		(1 << 4)
+#define AUO_PIXCIR_INT_ENABLE		(1 << 3)
+#define AUO_PIXCIR_INT_POL_HIGH		(1 << 2)
+#define AUO_PIXCIR_INT_MODE_MASK	0x03
+
+/*
+ * Power modes:
+ * active:	scan speed 60Hz
+ * sleep:	scan speed 10Hz can be auto-activated, wakeup on 1st touch
+ * deep sleep:	scan speed 1Hz can only be entered or left manually.
+ */
+#define AUO_PIXCIR_POWER_ACTIVE		0x00
+#define AUO_PIXCIR_POWER_SLEEP		0x01
+#define AUO_PIXCIR_POWER_DEEP_SLEEP	0x02
+#define AUO_PIXCIR_POWER_MASK		0x03
+
+#define AUO_PIXCIR_POWER_ALLOW_SLEEP	(1 << 2)
+#define AUO_PIXCIR_POWER_IDLE_TIME(ms)	((ms & 0xf) << 4)
+
+#define AUO_PIXCIR_CALIBRATE		0x03
+
+#define AUO_PIXCIR_EEPROM_CALIB_X_LEN	62
+#define AUO_PIXCIR_EEPROM_CALIB_Y_LEN	36
+
+#define AUO_PIXCIR_RAW_DATA_X_LEN	18
+#define AUO_PIXCIR_RAW_DATA_Y_LEN	11
+
+#define AUO_PIXCIR_STRENGTH_ENABLE	(1 << 0)
+
+/* Touchscreen absolute values */
+#define AUO_PIXCIR_REPORT_POINTS	2
+#define AUO_PIXCIR_MAX_AREA		0xff
+#define AUO_PIXCIR_PENUP_TIMEOUT_MS	10
+
+struct auo_pixcir_ts {
+	struct i2c_client	*client;
+	struct input_dev	*input;
+	char			phys[32];
+
+	/* special handling for touch_indicate interupt mode */
+	bool			touch_ind_mode;
+
+	wait_queue_head_t	wait;
+	bool			stopped;
+};
+
+struct auo_point_t {
+	int	coord_x;
+	int	coord_y;
+	int	area_major;
+	int	area_minor;
+	int	orientation;
+};
+
+static int auo_pixcir_collect_data(struct auo_pixcir_ts *ts,
+				   struct auo_point_t *point)
+{
+	struct i2c_client *client = ts->client;
+	const struct auo_pixcir_ts_platdata *pdata = client->dev.platform_data;
+	uint8_t raw_coord[8];
+	uint8_t raw_area[4];
+	int i, ret;
+
+	/* touch coordinates */
+	ret = i2c_smbus_read_i2c_block_data(client, AUO_PIXCIR_REG_X1_LSB,
+					    8, raw_coord);
+	if (ret < 0) {
+		dev_err(&client->dev, "failed to read coordinate, %d\n", ret);
+		return ret;
+	}
+
+	/* touch area */
+	ret = i2c_smbus_read_i2c_block_data(client, AUO_PIXCIR_REG_TOUCHAREA_X1,
+					    4, raw_area);
+	if (ret < 0) {
+		dev_err(&client->dev, "could not read touch area, %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < AUO_PIXCIR_REPORT_POINTS; i++) {
+		point[i].coord_x =
+			raw_coord[4 * i + 1] << 8 | raw_coord[4 * i];
+		point[i].coord_y =
+			raw_coord[4 * i + 3] << 8 | raw_coord[4 * i + 2];
+
+		if (point[i].coord_x > pdata->x_max ||
+		    point[i].coord_y > pdata->y_max) {
+			dev_warn(&client->dev, "coordinates (%d,%d) invalid\n",
+				point[i].coord_x, point[i].coord_y);
+			point[i].coord_x = point[i].coord_y = 0;
+		}
+
+		/* determine touch major, minor and orientation */
+		point[i].area_major = max(raw_area[2 * i], raw_area[2 * i + 1]);
+		point[i].area_minor = min(raw_area[2 * i], raw_area[2 * i + 1]);
+		point[i].orientation = raw_area[2 * i] > raw_area[2 * i + 1];
+	}
+
+	return 0;
+}
+
+static irqreturn_t auo_pixcir_interrupt(int irq, void *dev_id)
+{
+	struct auo_pixcir_ts *ts = dev_id;
+	struct i2c_client *client = ts->client;
+	const struct auo_pixcir_ts_platdata *pdata = client->dev.platform_data;
+	struct auo_point_t point[AUO_PIXCIR_REPORT_POINTS];
+	int i;
+	int ret;
+	int fingers = 0;
+	int abs = -1;
+
+	while (!ts->stopped) {
+
+		/* check for up event in touch touch_ind_mode */
+		if (ts->touch_ind_mode) {
+			if (gpio_get_value(pdata->gpio_int) == 0) {
+				input_mt_sync(ts->input);
+				input_report_key(ts->input, BTN_TOUCH, 0);
+				input_sync(ts->input);
+				break;
+			}
+		}
+
+		ret = auo_pixcir_collect_data(ts, point);
+		if (ret < 0) {
+			/* we want to loop only in touch_ind_mode */
+			if (!ts->touch_ind_mode)
+				break;
+
+			wait_event_timeout(ts->wait, ts->stopped,
+				msecs_to_jiffies(AUO_PIXCIR_PENUP_TIMEOUT_MS));
+			continue;
+		}
+
+		for (i = 0; i < AUO_PIXCIR_REPORT_POINTS; i++) {
+			if (point[i].coord_x > 0 || point[i].coord_y > 0) {
+				input_report_abs(ts->input, ABS_MT_POSITION_X,
+						 point[i].coord_x);
+				input_report_abs(ts->input, ABS_MT_POSITION_Y,
+						 point[i].coord_y);
+				input_report_abs(ts->input, ABS_MT_TOUCH_MAJOR,
+						 point[i].area_major);
+				input_report_abs(ts->input, ABS_MT_TOUCH_MINOR,
+						 point[i].area_minor);
+				input_report_abs(ts->input, ABS_MT_ORIENTATION,
+						 point[i].orientation);
+				input_mt_sync(ts->input);
+
+				/* use first finger as source for singletouch */
+				if (fingers == 0)
+					abs = i;
+
+				/* number of touch points could also be queried
+				 * via i2c but would require an additional call
+				 */
+				fingers++;
+			}
+		}
+
+		input_report_key(ts->input, BTN_TOUCH, fingers > 0);
+
+		if (abs > -1) {
+			input_report_abs(ts->input, ABS_X, point[abs].coord_x);
+			input_report_abs(ts->input, ABS_Y, point[abs].coord_y);
+		}
+
+		input_sync(ts->input);
+
+		/* we want to loop only in touch_ind_mode */
+		if (!ts->touch_ind_mode)
+			break;
+
+		wait_event_timeout(ts->wait, ts->stopped,
+				 msecs_to_jiffies(AUO_PIXCIR_PENUP_TIMEOUT_MS));
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Set the power mode of the device.
+ * Valid modes are
+ * - AUO_PIXCIR_POWER_ACTIVE
+ * - AUO_PIXCIR_POWER_SLEEP - automatically left on first touch
+ * - AUO_PIXCIR_POWER_DEEP_SLEEP
+ */
+static int auo_pixcir_power_mode(struct auo_pixcir_ts *ts, int mode)
+{
+	struct i2c_client *client = ts->client;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, AUO_PIXCIR_REG_POWER_MODE);
+	if (ret < 0) {
+		dev_err(&client->dev, "unable to read reg %Xh, %d\n",
+			AUO_PIXCIR_REG_POWER_MODE, ret);
+		return ret;
+	}
+
+	ret &= ~AUO_PIXCIR_POWER_MASK;
+	ret |= mode;
+
+	ret = i2c_smbus_write_byte_data(client, AUO_PIXCIR_REG_POWER_MODE, ret);
+	if (ret) {
+		dev_err(&client->dev, "unable to write reg %Xh, %d\n",
+			AUO_PIXCIR_REG_POWER_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static __devinit int auo_pixcir_int_config(struct auo_pixcir_ts *ts,
+					   int int_setting)
+{
+	struct i2c_client *client = ts->client;
+	struct auo_pixcir_ts_platdata *pdata = client->dev.platform_data;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, AUO_PIXCIR_REG_INT_SETTING);
+	if (ret < 0) {
+		dev_err(&client->dev, "unable to read reg %Xh, %d\n",
+			AUO_PIXCIR_REG_INT_SETTING, ret);
+		return ret;
+	}
+
+	ret &= ~AUO_PIXCIR_INT_MODE_MASK;
+	ret |= int_setting;
+	ret |= AUO_PIXCIR_INT_POL_HIGH; /* always use high for interrupts */
+
+	ret = i2c_smbus_write_byte_data(client, AUO_PIXCIR_REG_INT_SETTING,
+					ret);
+	if (ret < 0) {
+		dev_err(&client->dev, "unable to write reg %Xh, %d\n",
+			AUO_PIXCIR_REG_INT_SETTING, ret);
+		return ret;
+	}
+
+	ts->touch_ind_mode = pdata->int_setting == AUO_PIXCIR_INT_TOUCH_IND;
+
+	return 0;
+}
+
+/* control the generation of interrupts on the device side */
+static int auo_pixcir_int_toggle(struct auo_pixcir_ts *ts, bool enable)
+{
+	struct i2c_client *client = ts->client;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(client, AUO_PIXCIR_REG_INT_SETTING);
+	if (ret < 0) {
+		dev_err(&client->dev, "unable to read reg %Xh, %d\n",
+			AUO_PIXCIR_REG_INT_SETTING, ret);
+		return ret;
+	}
+
+	if (enable)
+		ret |= AUO_PIXCIR_INT_ENABLE;
+	else
+		ret &= ~AUO_PIXCIR_INT_ENABLE;
+
+	ret = i2c_smbus_write_byte_data(client, AUO_PIXCIR_REG_INT_SETTING,
+					ret);
+	if (ret < 0) {
+		dev_err(&client->dev, "unable to write reg %Xh, %d\n",
+			AUO_PIXCIR_REG_INT_SETTING, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int auo_pixcir_start(struct auo_pixcir_ts *ts)
+{
+	struct i2c_client *client = ts->client;
+	int ret;
+
+	ret = auo_pixcir_power_mode(ts, AUO_PIXCIR_POWER_ACTIVE);
+	if (ret < 0) {
+		dev_err(&client->dev, "could not set power mode, %d\n",
+			ret);
+		return ret;
+	}
+
+	ts->stopped = false;
+	mb();
+	enable_irq(client->irq);
+
+	ret = auo_pixcir_int_toggle(ts, 1);
+	if (ret < 0) {
+		dev_err(&client->dev, "could not enable interrupt, %d\n",
+			ret);
+		disable_irq(client->irq);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int auo_pixcir_stop(struct auo_pixcir_ts *ts)
+{
+	struct i2c_client *client = ts->client;
+	int ret;
+
+	ret = auo_pixcir_int_toggle(ts, 0);
+	if (ret < 0) {
+		dev_err(&client->dev, "could not disable interrupt, %d\n",
+			ret);
+		return ret;
+	}
+
+	/* disable receiving of interrupts */
+	disable_irq(client->irq);
+	ts->stopped = true;
+	mb();
+	wake_up(&ts->wait);
+
+	return auo_pixcir_power_mode(ts, AUO_PIXCIR_POWER_DEEP_SLEEP);
+}
+
+static int auo_pixcir_input_open(struct input_dev *dev)
+{
+	struct auo_pixcir_ts *ts = input_get_drvdata(dev);
+	int ret;
+
+	ret = auo_pixcir_start(ts);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void auo_pixcir_input_close(struct input_dev *dev)
+{
+	struct auo_pixcir_ts *ts = input_get_drvdata(dev);
+
+	auo_pixcir_stop(ts);
+
+	return;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int auo_pixcir_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct auo_pixcir_ts *ts = i2c_get_clientdata(client);
+	struct input_dev *input = ts->input;
+	int ret = 0;
+
+	mutex_lock(&input->mutex);
+
+	/* when configured as wakeup source, device should always wake system
+	 * therefore start device if necessary
+	 */
+	if (device_may_wakeup(&client->dev)) {
+		/* need to start device if not open, to be wakeup source */
+		if (!input->users) {
+			ret = auo_pixcir_start(ts);
+			if (ret)
+				goto unlock;
+		}
+
+		enable_irq_wake(client->irq);
+		ret = auo_pixcir_power_mode(ts, AUO_PIXCIR_POWER_SLEEP);
+	} else if (input->users) {
+		ret = auo_pixcir_stop(ts);
+	}
+
+unlock:
+	mutex_unlock(&input->mutex);
+
+	return ret;
+}
+
+static int auo_pixcir_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct auo_pixcir_ts *ts = i2c_get_clientdata(client);
+	struct input_dev *input = ts->input;
+	int ret = 0;
+
+	mutex_lock(&input->mutex);
+
+	if (device_may_wakeup(&client->dev)) {
+		disable_irq_wake(client->irq);
+
+		/* need to stop device if it was not open on suspend */
+		if (!input->users) {
+			ret = auo_pixcir_stop(ts);
+			if (ret)
+				goto unlock;
+		}
+
+		/* device wakes automatically from SLEEP */
+	} else if (input->users) {
+		ret = auo_pixcir_start(ts);
+	}
+
+unlock:
+	mutex_unlock(&input->mutex);
+
+	return ret;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(auo_pixcir_pm_ops, auo_pixcir_suspend,
+			 auo_pixcir_resume);
+
+static int __devinit auo_pixcir_probe(struct i2c_client *client,
+				      const struct i2c_device_id *id)
+{
+	const struct auo_pixcir_ts_platdata *pdata = client->dev.platform_data;
+	struct auo_pixcir_ts *ts;
+	struct input_dev *input_dev;
+	int ret;
+
+	if (!pdata)
+		return -EINVAL;
+
+	ts = kzalloc(sizeof(struct auo_pixcir_ts), GFP_KERNEL);
+	if (!ts)
+		return -ENOMEM;
+
+	ret = gpio_request(pdata->gpio_int, "auo_pixcir_ts_int");
+	if (ret) {
+		dev_err(&client->dev, "request of gpio %d failed, %d\n",
+			pdata->gpio_int, ret);
+		goto err_gpio_int;
+	}
+
+	if (pdata->init_hw)
+		pdata->init_hw(client);
+
+	ts->client = client;
+	ts->touch_ind_mode = 0;
+	init_waitqueue_head(&ts->wait);
+
+	snprintf(ts->phys, sizeof(ts->phys),
+		 "%s/input0", dev_name(&client->dev));
+
+	input_dev = input_allocate_device();
+	if (!input_dev) {
+		dev_err(&client->dev, "could not allocate input device\n");
+		goto err_input_alloc;
+	}
+
+	ts->input = input_dev;
+
+	input_dev->name = "AUO-Pixcir touchscreen";
+	input_dev->phys = ts->phys;
+	input_dev->id.bustype = BUS_I2C;
+	input_dev->dev.parent = &client->dev;
+
+	input_dev->open = auo_pixcir_input_open;
+	input_dev->close = auo_pixcir_input_close;
+
+	__set_bit(EV_ABS, input_dev->evbit);
+	__set_bit(EV_KEY, input_dev->evbit);
+
+	__set_bit(BTN_TOUCH, input_dev->keybit);
+
+	/* For single touch */
+	input_set_abs_params(input_dev, ABS_X, 0, pdata->x_max, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, 0, pdata->y_max, 0, 0);
+
+	/* For multi touch */
+	input_set_abs_params(input_dev, ABS_MT_POSITION_X, 0,
+			     pdata->x_max, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_POSITION_Y, 0,
+			     pdata->y_max, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0,
+			     AUO_PIXCIR_MAX_AREA, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_TOUCH_MINOR, 0,
+			     AUO_PIXCIR_MAX_AREA, 0, 0);
+	input_set_abs_params(input_dev, ABS_MT_ORIENTATION, 0, 1, 0, 0);
+
+	ret = i2c_smbus_read_byte_data(client, AUO_PIXCIR_REG_VERSION);
+	if (ret < 0)
+		goto err_fw_vers;
+	dev_info(&client->dev, "firmware version 0x%X\n", ret);
+
+	ret = auo_pixcir_int_config(ts, pdata->int_setting);
+	if (ret)
+		goto err_fw_vers;
+
+	input_set_drvdata(ts->input, ts);
+	ts->stopped = true;
+
+	ret = request_threaded_irq(client->irq, NULL, auo_pixcir_interrupt,
+				   IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+				   input_dev->name, ts);
+	if (ret) {
+		dev_err(&client->dev, "irq %d requested failed\n", client->irq);
+		goto err_fw_vers;
+	}
+
+	/* stop device and put it into deep sleep until it is opened */
+	ret = auo_pixcir_stop(ts);
+	if (ret < 0)
+		goto err_input_register;
+
+	ret = input_register_device(input_dev);
+	if (ret) {
+		dev_err(&client->dev, "could not register input device\n");
+		goto err_input_register;
+	}
+
+	i2c_set_clientdata(client, ts);
+
+	return 0;
+
+err_input_register:
+	free_irq(client->irq, ts);
+err_fw_vers:
+	input_free_device(input_dev);
+err_input_alloc:
+	if (pdata->exit_hw)
+		pdata->exit_hw(client);
+	gpio_free(pdata->gpio_int);
+err_gpio_int:
+	kfree(ts);
+
+	return ret;
+}
+
+static int __devexit auo_pixcir_remove(struct i2c_client *client)
+{
+	struct auo_pixcir_ts *ts = i2c_get_clientdata(client);
+	const struct auo_pixcir_ts_platdata *pdata = client->dev.platform_data;
+
+	free_irq(client->irq, ts);
+
+	input_unregister_device(ts->input);
+
+	if (pdata->exit_hw)
+		pdata->exit_hw(client);
+
+	gpio_free(pdata->gpio_int);
+
+	kfree(ts);
+
+	return 0;
+}
+
+static const struct i2c_device_id auo_pixcir_idtable[] = {
+	{ "auo_pixcir_ts", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, auo_pixcir_idtable);
+
+static struct i2c_driver auo_pixcir_driver = {
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "auo_pixcir_ts",
+		.pm	= &auo_pixcir_pm_ops,
+	},
+	.probe		= auo_pixcir_probe,
+	.remove		= __devexit_p(auo_pixcir_remove),
+	.id_table	= auo_pixcir_idtable,
+};
+
+static int __init auo_pixcir_init(void)
+{
+	return i2c_add_driver(&auo_pixcir_driver);
+}
+module_init(auo_pixcir_init);
+
+static void __exit auo_pixcir_exit(void)
+{
+	i2c_del_driver(&auo_pixcir_driver);
+}
+module_exit(auo_pixcir_exit);
+
+MODULE_DESCRIPTION("AUO-PIXCIR touchscreen driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Heiko Stuebner <heiko@sntech.de>");
diff --git a/include/linux/input/auo-pixcir-ts.h b/include/linux/input/auo-pixcir-ts.h
new file mode 100644
index 000000000000..75d4be717714
--- /dev/null
+++ b/include/linux/input/auo-pixcir-ts.h
@@ -0,0 +1,56 @@
+/*
+ * Driver for AUO in-cell touchscreens
+ *
+ * Copyright (c) 2011 Heiko Stuebner <heiko@sntech.de>
+ *
+ * based on auo_touch.h from Dell Streak kernel
+ *
+ * Copyright (c) 2008 QUALCOMM Incorporated.
+ * Copyright (c) 2008 QUALCOMM USA, INC.
+ *
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __AUO_PIXCIR_TS_H__
+#define __AUO_PIXCIR_TS_H__
+
+/*
+ * Interrupt modes:
+ * periodical:		interrupt is asserted periodicaly
+ * compare coordinates:	interrupt is asserted when coordinates change
+ * indicate touch:	interrupt is asserted during touch
+ */
+#define AUO_PIXCIR_INT_PERIODICAL	0x00
+#define AUO_PIXCIR_INT_COMP_COORD	0x01
+#define AUO_PIXCIR_INT_TOUCH_IND	0x02
+
+/*
+ * @gpio_int		interrupt gpio
+ * @int_setting		one of AUO_PIXCIR_INT_*
+ * @init_hw		hardwarespecific init
+ * @exit_hw		hardwarespecific shutdown
+ * @x_max		x-resolution
+ * @y_max		y-resolution
+ */
+struct auo_pixcir_ts_platdata {
+	int gpio_int;
+
+	int int_setting;
+
+	void (*init_hw)(struct i2c_client *);
+	void (*exit_hw)(struct i2c_client *);
+
+	unsigned int x_max;
+	unsigned int y_max;
+};
+
+#endif
-- 
cgit v1.2.3


From 1ba37268cd19e5a2a80924bfe8618bf1ba3e8249 Mon Sep 17 00:00:00 2001
From: Yongqiang Yang <xiaoqiangnk@gmail.com>
Date: Wed, 28 Dec 2011 17:46:46 -0500
Subject: jbd2: clear revoked flag on buffers before a new transaction started

Currently, we clear revoked flag only when a block is reused.  However,
this can tigger a false journal error.  Consider a situation when a block
is used as a meta block and is deleted(revoked) in ordered mode, then the
block is allocated as a data block to a file.  At this moment, user changes
the file's journal mode from ordered to journaled and truncates the file.
The block will be considered re-revoked by journal because it has revoked
flag still pending from the last transaction and an assertion triggers.

We fix the problem by keeping the revoked status more uptodate - we clear
revoked flag when switching revoke tables to reflect there is no revoked
buffers in current transaction any more.

Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c     |  6 ++++++
 fs/jbd2/revoke.c     | 34 ++++++++++++++++++++++++++++++++++
 include/linux/jbd2.h |  1 +
 3 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 68d704db787f..5069b8475150 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -429,6 +429,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 	jbd_debug(3, "JBD2: commit phase 1\n");
 
+	/*
+	 * Clear revoked flag to reflect there is no revoked buffers
+	 * in the next transaction which is going to be started.
+	 */
+	jbd2_clear_buffer_revoked_flags(journal);
+
 	/*
 	 * Switch to a new revoke table.
 	 */
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 69fd93588118..30b2867d6cc9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -47,6 +47,10 @@
  *   overwriting the new data.  We don't even need to clear the revoke
  *   bit here.
  *
+ * We cache revoke status of a buffer in the current transaction in b_states
+ * bits.  As the name says, revokevalid flag indicates that the cached revoke
+ * status of a buffer is valid and we can rely on the cached status.
+ *
  * Revoke information on buffers is a tri-state value:
  *
  * RevokeValid clear:	no cached revoke status, need to look it up
@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 	return did_revoke;
 }
 
+/*
+ * journal_clear_revoked_flag clears revoked flag of buffers in
+ * revoke table to reflect there is no revoked buffers in the next
+ * transaction which is going to be started.
+ */
+void jbd2_clear_buffer_revoked_flags(journal_t *journal)
+{
+	struct jbd2_revoke_table_s *revoke = journal->j_revoke;
+	int i = 0;
+
+	for (i = 0; i < revoke->hash_size; i++) {
+		struct list_head *hash_list;
+		struct list_head *list_entry;
+		hash_list = &revoke->hash_table[i];
+
+		list_for_each(list_entry, hash_list) {
+			struct jbd2_revoke_record_s *record;
+			struct buffer_head *bh;
+			record = (struct jbd2_revoke_record_s *)list_entry;
+			bh = __find_get_block(journal->j_fs_dev,
+					      record->blocknr,
+					      journal->j_blocksize);
+			if (bh) {
+				clear_buffer_revoked(bh);
+				__brelse(bh);
+			}
+		}
+	}
+}
+
 /* journal_switch_revoke table select j_revoke for next transaction
  * we do not want to suspend any processing until all revokes are
  * written -bzzz
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 2092ea21e469..5557baefed60 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1151,6 +1151,7 @@ extern int	jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
 extern int	jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
 extern void	jbd2_journal_clear_revoke(journal_t *);
 extern void	jbd2_journal_switch_revoke_table(journal_t *journal);
+extern void	jbd2_clear_buffer_revoked_flags(journal_t *journal);
 
 /*
  * The log thread user interface:
-- 
cgit v1.2.3


From cc1d327232759647ea56725eab1c6b16c92d52fa Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Mon, 14 Nov 2011 08:48:18 -0300
Subject: [media] v4l: Add new alpha component control

The V4L2_CID_ALPHA_COMPONENT control is intended for the video capture
or memory-to-memory devices that are capable of setting up the per-pixel
alpha component to some arbitrary value. It allows to set the alpha
component for all pixels to an arbitrary value.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/compat.xml         | 11 ++++++++++
 Documentation/DocBook/media/v4l/controls.xml       | 25 ++++++++++++++++------
 .../DocBook/media/v4l/pixfmt-packed-rgb.xml        |  7 ++++--
 drivers/media/video/v4l2-ctrls.c                   |  1 +
 include/linux/videodev2.h                          |  6 +++---
 5 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml
index 8b44a43f4542..12ba26262d32 100644
--- a/Documentation/DocBook/media/v4l/compat.xml
+++ b/Documentation/DocBook/media/v4l/compat.xml
@@ -2379,6 +2379,17 @@ that used it. It was originally scheduled for removal in 2.6.35.
       </orderedlist>
     </section>
 
+    <section>
+      <title>V4L2 in Linux 3.3</title>
+      <orderedlist>
+        <listitem>
+	  <para>Added <constant>V4L2_CID_ALPHA_COMPONENT</constant> control
+	    to the <link linkend="control">User controls class</link>.
+	  </para>
+        </listitem>
+      </orderedlist>
+    </section>
+
     <section id="other">
       <title>Relation of V4L2 to other Linux multimedia APIs</title>
 
diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index c0422c622337..a1be37897ad7 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -323,12 +323,6 @@ minimum value disables backlight compensation.</entry>
 	    <entry>Switch on or off the illuminator 1 or 2 of the device
 		(usually a microscope).</entry>
 	  </row>
-	  <row>
-	    <entry><constant>V4L2_CID_LASTP1</constant></entry>
-	    <entry></entry>
-	    <entry>End of the predefined control IDs (currently
-<constant>V4L2_CID_ILLUMINATORS_2</constant> + 1).</entry>
-	  </row>
 	  <row>
 	    <entry><constant>V4L2_CID_MIN_BUFFERS_FOR_CAPTURE</constant></entry>
 	    <entry>integer</entry>
@@ -345,6 +339,25 @@ and used as a hint to determine the number of OUTPUT buffers to pass to REQBUFS.
 The value is the minimum number of OUTPUT buffers that is necessary for hardware
 to work.</entry>
 	  </row>
+	  <row id="v4l2-alpha-component">
+	    <entry><constant>V4L2_CID_ALPHA_COMPONENT</constant></entry>
+	    <entry>integer</entry>
+	    <entry> Sets the alpha color component on the capture device or on
+	    the capture buffer queue of a mem-to-mem device. When a mem-to-mem
+	    device produces frame format that includes an alpha component
+	    (e.g. <link linkend="rgb-formats">packed RGB image formats</link>)
+	    and the alpha value is not defined by the mem-to-mem input data
+	    this control lets you select the alpha component value of all
+	    pixels. It is applicable to any pixel format that contains an alpha
+	    component.
+	    </entry>
+	  </row>
+	  <row>
+	    <entry><constant>V4L2_CID_LASTP1</constant></entry>
+	    <entry></entry>
+	    <entry>End of the predefined control IDs (currently
+	      <constant>V4L2_CID_ALPHA_COMPONENT</constant> + 1).</entry>
+	  </row>
 	  <row>
 	    <entry><constant>V4L2_CID_PRIVATE_BASE</constant></entry>
 	    <entry></entry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
index ba56536622f2..166c8d65e4f7 100644
--- a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
@@ -428,8 +428,11 @@ colorspace <constant>V4L2_COLORSPACE_SRGB</constant>.</para>
     <para>Bit 7 is the most significant bit. The value of a = alpha
 bits is undefined when reading from the driver, ignored when writing
 to the driver, except when alpha blending has been negotiated for a
-<link linkend="overlay">Video Overlay</link> or <link
-linkend="osd">Video Output Overlay</link>.</para>
+<link linkend="overlay">Video Overlay</link> or <link linkend="osd">
+Video Output Overlay</link> or when alpha component has been configured
+for a <link linkend="capture">Video Capture</link> by means of <link
+linkend="v4l2-alpha-component"> <constant>V4L2_CID_ALPHA_COMPONENT
+</constant> </link> control.</para>
 
     <example>
       <title><constant>V4L2_PIX_FMT_BGR24</constant> 4 &times; 4 pixel
diff --git a/drivers/media/video/v4l2-ctrls.c b/drivers/media/video/v4l2-ctrls.c
index 0f415dade05a..39266153499f 100644
--- a/drivers/media/video/v4l2-ctrls.c
+++ b/drivers/media/video/v4l2-ctrls.c
@@ -467,6 +467,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_ILLUMINATORS_2:		return "Illuminator 2";
 	case V4L2_CID_MIN_BUFFERS_FOR_CAPTURE:	return "Minimum Number of Capture Buffers";
 	case V4L2_CID_MIN_BUFFERS_FOR_OUTPUT:	return "Minimum Number of Output Buffers";
+	case V4L2_CID_ALPHA_COMPONENT:		return "Alpha Component";
 
 	/* MPEG controls */
 	/* Keep the order of the 'case's the same as in videodev2.h! */
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 3d62631839bc..2965906a02c9 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1204,10 +1204,10 @@ enum v4l2_colorfx {
 #define V4L2_CID_MIN_BUFFERS_FOR_CAPTURE	(V4L2_CID_BASE+39)
 #define V4L2_CID_MIN_BUFFERS_FOR_OUTPUT		(V4L2_CID_BASE+40)
 
-/* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+41)
+#define V4L2_CID_ALPHA_COMPONENT		(V4L2_CID_BASE+41)
 
-/* Minimum number of buffer neede by the device */
+/* last CID + 1 */
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+42)
 
 /*  MPEG-class control IDs defined by V4L2 */
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit v1.2.3


From 36a281e25276f2d138bbbca4170d11453323cce1 Mon Sep 17 00:00:00 2001
From: Jianchun Bian <jcbian@pixcir.com.cn>
Date: Fri, 30 Dec 2011 15:16:21 -0800
Subject: Input: add driver for pixcir i2c touchscreens

This patch adds a driver for PIXCIR's I2C connected touchscreens.

Signed-off-by: Jianchun <jcbian@pixcir.com.cn>
Acked-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig         |  12 ++
 drivers/input/touchscreen/Makefile        |   1 +
 drivers/input/touchscreen/pixcir_i2c_ts.c | 239 ++++++++++++++++++++++++++++++
 include/linux/input/pixcir_ts.h           |  10 ++
 4 files changed, 262 insertions(+)
 create mode 100644 drivers/input/touchscreen/pixcir_i2c_ts.c
 create mode 100644 include/linux/input/pixcir_ts.h

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index a121e36e5a47..4af2a18eb3ba 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -458,6 +458,18 @@ config TOUCHSCREEN_UCB1400
 	  To compile this driver as a module, choose M here: the
 	  module will be called ucb1400_ts.
 
+config TOUCHSCREEN_PIXCIR
+	tristate "PIXCIR I2C touchscreens"
+	depends on I2C
+	help
+	  Say Y here if you have a pixcir i2c touchscreen
+	  controller.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pixcir_i2c_ts.
+
 config TOUCHSCREEN_WM831X
 	tristate "Support for WM831x touchscreen controllers"
 	depends on MFD_WM831X
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index f0b4d16d39a6..496091e88460 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_TOUCHSCREEN_HTCPEN)	+= htcpen.o
 obj-$(CONFIG_TOUCHSCREEN_USB_COMPOSITE)	+= usbtouchscreen.o
 obj-$(CONFIG_TOUCHSCREEN_PCAP)		+= pcap_ts.o
 obj-$(CONFIG_TOUCHSCREEN_PENMOUNT)	+= penmount.o
+obj-$(CONFIG_TOUCHSCREEN_PIXCIR)	+= pixcir_i2c_ts.o
 obj-$(CONFIG_TOUCHSCREEN_S3C2410)	+= s3c2410_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ST1232)	+= st1232.o
 obj-$(CONFIG_TOUCHSCREEN_STMPE)		+= stmpe-ts.o
diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
new file mode 100644
index 000000000000..d5ac09a1ee56
--- /dev/null
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c
@@ -0,0 +1,239 @@
+/*
+ * Driver for Pixcir I2C touchscreen controllers.
+ *
+ * Copyright (C) 2010-2011 Pixcir, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/pixcir_ts.h>
+
+struct pixcir_i2c_ts_data {
+	struct i2c_client *client;
+	struct input_dev *input;
+	const struct pixcir_ts_platform_data *chip;
+	bool exiting;
+};
+
+static void pixcir_ts_poscheck(struct pixcir_i2c_ts_data *data)
+{
+	struct pixcir_i2c_ts_data *tsdata = data;
+	u8 rdbuf[10], wrbuf[1] = { 0 };
+	u8 touch;
+	int ret;
+
+	ret = i2c_master_send(tsdata->client, wrbuf, sizeof(wrbuf));
+	if (ret != sizeof(wrbuf)) {
+		dev_err(&tsdata->client->dev,
+			"%s: i2c_master_send failed(), ret=%d\n",
+			__func__, ret);
+		return;
+	}
+
+	ret = i2c_master_recv(tsdata->client, rdbuf, sizeof(rdbuf));
+	if (ret != sizeof(rdbuf)) {
+		dev_err(&tsdata->client->dev,
+			"%s: i2c_master_recv failed(), ret=%d\n",
+			__func__, ret);
+		return;
+	}
+
+	touch = rdbuf[0];
+	if (touch) {
+		u16 posx1 = (rdbuf[3] << 8) | rdbuf[2];
+		u16 posy1 = (rdbuf[5] << 8) | rdbuf[4];
+		u16 posx2 = (rdbuf[7] << 8) | rdbuf[6];
+		u16 posy2 = (rdbuf[9] << 8) | rdbuf[8];
+
+		input_report_key(tsdata->input, BTN_TOUCH, 1);
+		input_report_abs(tsdata->input, ABS_X, posx1);
+		input_report_abs(tsdata->input, ABS_Y, posy1);
+
+		input_report_abs(tsdata->input, ABS_MT_POSITION_X, posx1);
+		input_report_abs(tsdata->input, ABS_MT_POSITION_Y, posy1);
+		input_mt_sync(tsdata->input);
+
+		if (touch == 2) {
+			input_report_abs(tsdata->input,
+					 ABS_MT_POSITION_X, posx2);
+			input_report_abs(tsdata->input,
+					 ABS_MT_POSITION_Y, posy2);
+			input_mt_sync(tsdata->input);
+		}
+	} else {
+		input_report_key(tsdata->input, BTN_TOUCH, 0);
+	}
+
+	input_sync(tsdata->input);
+}
+
+static irqreturn_t pixcir_ts_isr(int irq, void *dev_id)
+{
+	struct pixcir_i2c_ts_data *tsdata = dev_id;
+
+	while (!tsdata->exiting) {
+		pixcir_ts_poscheck(tsdata);
+
+		if (tsdata->chip->attb_read_val())
+			break;
+
+		msleep(20);
+	}
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int pixcir_i2c_ts_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	if (device_may_wakeup(&client->dev))
+		enable_irq_wake(client->irq);
+
+	return 0;
+}
+
+static int pixcir_i2c_ts_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	if (device_may_wakeup(&client->dev))
+		disable_irq_wake(client->irq);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(pixcir_dev_pm_ops,
+			 pixcir_i2c_ts_suspend, pixcir_i2c_ts_resume);
+
+static int __devinit pixcir_i2c_ts_probe(struct i2c_client *client,
+					 const struct i2c_device_id *id)
+{
+	const struct pixcir_ts_platform_data *pdata = client->dev.platform_data;
+	struct pixcir_i2c_ts_data *tsdata;
+	struct input_dev *input;
+	int error;
+
+	if (!pdata) {
+		dev_err(&client->dev, "platform data not defined\n");
+		return -EINVAL;
+	}
+
+	tsdata = kzalloc(sizeof(*tsdata), GFP_KERNEL);
+	input = input_allocate_device();
+	if (!tsdata || !input) {
+		dev_err(&client->dev, "Failed to allocate driver data!\n");
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	tsdata->client = client;
+	tsdata->input = input;
+	tsdata->chip = pdata;
+
+	input->name = client->name;
+	input->id.bustype = BUS_I2C;
+	input->dev.parent = &client->dev;
+
+	__set_bit(EV_KEY, input->evbit);
+	__set_bit(EV_ABS, input->evbit);
+	__set_bit(BTN_TOUCH, input->keybit);
+	input_set_abs_params(input, ABS_X, 0, pdata->x_max, 0, 0);
+	input_set_abs_params(input, ABS_Y, 0, pdata->y_max, 0, 0);
+	input_set_abs_params(input, ABS_MT_POSITION_X, 0, pdata->x_max, 0, 0);
+	input_set_abs_params(input, ABS_MT_POSITION_Y, 0, pdata->y_max, 0, 0);
+
+	input_set_drvdata(input, tsdata);
+
+	error = request_threaded_irq(client->irq, NULL, pixcir_ts_isr,
+				     IRQF_TRIGGER_FALLING,
+				     client->name, tsdata);
+	if (error) {
+		dev_err(&client->dev, "Unable to request touchscreen IRQ.\n");
+		goto err_free_mem;
+	}
+
+	error = input_register_device(input);
+	if (error)
+		goto err_free_irq;
+
+	i2c_set_clientdata(client, tsdata);
+	device_init_wakeup(&client->dev, 1);
+
+	return 0;
+
+err_free_irq:
+	free_irq(client->irq, tsdata);
+err_free_mem:
+	input_free_device(input);
+	kfree(tsdata);
+	return error;
+}
+
+static int __devexit pixcir_i2c_ts_remove(struct i2c_client *client)
+{
+	struct pixcir_i2c_ts_data *tsdata = i2c_get_clientdata(client);
+
+	device_init_wakeup(&client->dev, 0);
+
+	tsdata->exiting = true;
+	mb();
+	free_irq(client->irq, tsdata);
+
+	input_unregister_device(tsdata->input);
+	kfree(tsdata);
+
+	return 0;
+}
+
+static const struct i2c_device_id pixcir_i2c_ts_id[] = {
+	{ "pixcir_ts", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, pixcir_i2c_ts_id);
+
+static struct i2c_driver pixcir_i2c_ts_driver = {
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "pixcir_ts",
+		.pm	= &pixcir_dev_pm_ops,
+	},
+	.probe		= pixcir_i2c_ts_probe,
+	.remove		= __devexit_p(pixcir_i2c_ts_remove),
+	.id_table	= pixcir_i2c_ts_id,
+};
+
+static int __init pixcir_i2c_ts_init(void)
+{
+	return i2c_add_driver(&pixcir_i2c_ts_driver);
+}
+module_init(pixcir_i2c_ts_init);
+
+static void __exit pixcir_i2c_ts_exit(void)
+{
+	i2c_del_driver(&pixcir_i2c_ts_driver);
+}
+module_exit(pixcir_i2c_ts_exit);
+
+MODULE_AUTHOR("Jianchun Bian <jcbian@pixcir.com.cn>, Dequan Meng <dqmeng@pixcir.com.cn>");
+MODULE_DESCRIPTION("Pixcir I2C Touchscreen Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h
new file mode 100644
index 000000000000..7163d91c0373
--- /dev/null
+++ b/include/linux/input/pixcir_ts.h
@@ -0,0 +1,10 @@
+#ifndef	_PIXCIR_I2C_TS_H
+#define	_PIXCIR_I2C_TS_H
+
+struct pixcir_ts_platform_data {
+	int (*attb_read_val)(void);
+	int x_max;
+	int y_max;
+};
+
+#endif
-- 
cgit v1.2.3


From 8de8594a79ae43b08d115c94f09373f6c673f202 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 26 Dec 2011 20:22:50 -0300
Subject: [media] dvb-core: be sure that drivers won't use DVBv3 internally

Now that all frontends are implementing DVBv5, don't export the
DVBv3 specific stuff to the drivers. Only the core should be
aware of that, as it will keep providing DVBv3 backward compatibility.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c | 3 +++
 drivers/media/dvb/dvb-core/dvb_frontend.h | 2 ++
 include/linux/dvb/frontend.h              | 6 ++++--
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index b1ab866743fd..55ca5521bca6 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -25,6 +25,9 @@
  * Or, point your browser to http://www.gnu.org/copyleft/gpl.html
  */
 
+/* Enables DVBv3 compatibility bits at the headers */
+#define __DVB_CORE__
+
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 93715d6755f4..676481c8ad78 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -315,6 +315,7 @@ struct dvb_frontend_ops {
 	int (*get_property)(struct dvb_frontend* fe, struct dtv_property* tvp);
 };
 
+#ifdef __DVB_CORE__
 #define MAX_EVENT 8
 
 struct dvb_fe_events {
@@ -325,6 +326,7 @@ struct dvb_fe_events {
 	wait_queue_head_t	  wait_queue;
 	struct mutex		  mtx;
 };
+#endif
 
 struct dtv_frontend_properties {
 
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index a3c762383f88..7e7cb64f56d8 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -181,6 +181,7 @@ typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_32K,
 } fe_transmit_mode_t;
 
+#if defined(__DVB_CORE__) || !defined (__KERNEL__)
 typedef enum fe_bandwidth {
 	BANDWIDTH_8_MHZ,
 	BANDWIDTH_7_MHZ,
@@ -190,7 +191,7 @@ typedef enum fe_bandwidth {
 	BANDWIDTH_10_MHZ,
 	BANDWIDTH_1_712_MHZ,
 } fe_bandwidth_t;
-
+#endif
 
 typedef enum fe_guard_interval {
 	GUARD_INTERVAL_1_32,
@@ -213,6 +214,7 @@ typedef enum fe_hierarchy {
 } fe_hierarchy_t;
 
 
+#if defined(__DVB_CORE__) || !defined (__KERNEL__)
 struct dvb_qpsk_parameters {
 	__u32		symbol_rate;  /* symbol rate in Symbols per second */
 	fe_code_rate_t	fec_inner;    /* forward error correction (see above) */
@@ -251,11 +253,11 @@ struct dvb_frontend_parameters {
 	} u;
 };
 
-
 struct dvb_frontend_event {
 	fe_status_t status;
 	struct dvb_frontend_parameters parameters;
 };
+#endif
 
 /* S2API Commands */
 #define DTV_UNDEFINED		0
-- 
cgit v1.2.3


From 3bb3dbbd56ea39e5537db8f8041ea95d28f16a7f Mon Sep 17 00:00:00 2001
From: Donggeun Kim <dg77.kim@samsung.com>
Date: Tue, 27 Dec 2011 18:47:48 +0900
Subject: power_supply: Add initial Charger-Manager driver

Because battery health monitoring should be done even when suspended,
it needs to wake up and suspend periodically. Thus, userspace battery
monitoring may incur too much overhead; every device and task is woken
up periodically. Charger Manager uses suspend-again to provide
in-suspend monitoring.

This patch allows to monitor battery health in-suspend state.

Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 Documentation/power/charger-manager.txt | 149 ++++++
 drivers/power/Kconfig                   |  10 +
 drivers/power/Makefile                  |   1 +
 drivers/power/charger-manager.c         | 779 ++++++++++++++++++++++++++++++++
 include/linux/power/charger-manager.h   | 130 ++++++
 5 files changed, 1069 insertions(+)
 create mode 100644 Documentation/power/charger-manager.txt
 create mode 100644 drivers/power/charger-manager.c
 create mode 100644 include/linux/power/charger-manager.h

(limited to 'include/linux')

diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt
new file mode 100644
index 000000000000..081489f3db25
--- /dev/null
+++ b/Documentation/power/charger-manager.txt
@@ -0,0 +1,149 @@
+Charger Manager
+	(C) 2011 MyungJoo Ham <myungjoo.ham@samsung.com>, GPL
+
+Charger Manager provides in-kernel battery charger management that
+requires temperature monitoring during suspend-to-RAM state
+and where each battery may have multiple chargers attached and the userland
+wants to look at the aggregated information of the multiple chargers.
+
+Charger Manager is a platform_driver with power-supply-class entries.
+An instance of Charger Manager (a platform-device created with Charger-Manager)
+represents an independent battery with chargers. If there are multiple
+batteries with their own chargers acting independently in a system,
+the system may need multiple instances of Charger Manager.
+
+1. Introduction
+===============
+
+Charger Manager supports the following:
+
+* Support for multiple chargers (e.g., a device with USB, AC, and solar panels)
+	A system may have multiple chargers (or power sources) and some of
+	they may be activated at the same time. Each charger may have its
+	own power-supply-class and each power-supply-class can provide
+	different information about the battery status. This framework
+	aggregates charger-related information from multiple sources and
+	shows combined information as a single power-supply-class.
+
+* Support for in suspend-to-RAM polling (with suspend_again callback)
+	While the battery is being charged and the system is in suspend-to-RAM,
+	we may need to monitor the battery health by looking at the ambient or
+	battery temperature. We can accomplish this by waking up the system
+	periodically. However, such a method wakes up devices unncessary for
+	monitoring the battery health and tasks, and user processes that are
+	supposed to be kept suspended. That, in turn, incurs unnecessary power
+	consumption and slow down charging process. Or even, such peak power
+	consumption can stop chargers in the middle of charging
+	(external power input < device power consumption), which not
+	only affects the charging time, but the lifespan of the battery.
+
+	Charger Manager provides a function "cm_suspend_again" that can be
+	used as suspend_again callback of platform_suspend_ops. If the platform
+	requires tasks other than cm_suspend_again, it may implement its own
+	suspend_again callback that calls cm_suspend_again in the middle.
+	Normally, the platform will need to resume and suspend some devices
+	that are used by Charger Manager.
+
+2. Global Charger-Manager Data related with suspend_again
+========================================================
+In order to setup Charger Manager with suspend-again feature
+(in-suspend monitoring), the user should provide charger_global_desc
+with setup_charger_manager(struct charger_global_desc *).
+This charger_global_desc data for in-suspend monitoring is global
+as the name suggests. Thus, the user needs to provide only once even
+if there are multiple batteries. If there are multiple batteries, the
+multiple instances of Charger Manager share the same charger_global_desc
+and it will manage in-suspend monitoring for all instances of Charger Manager.
+
+The user needs to provide all the two entries properly in order to activate
+in-suspend monitoring:
+
+struct charger_global_desc {
+
+char *rtc_name;
+	: The name of rtc (e.g., "rtc0") used to wakeup the system from
+	suspend for Charger Manager. The alarm interrupt (AIE) of the rtc
+	should be able to wake up the system from suspend. Charger Manager
+	saves and restores the alarm value and use the previously-defined
+	alarm if it is going to go off earlier than Charger Manager so that
+	Charger Manager does not interfere with previously-defined alarms.
+
+bool (*rtc_only_wakeup)(void);
+	: This callback should let CM know whether
+	the wakeup-from-suspend is caused only by the alarm of "rtc" in the
+	same struct. If there is any other wakeup source triggered the
+	wakeup, it should return false. If the "rtc" is the only wakeup
+	reason, it should return true.
+};
+
+3. How to setup suspend_again
+=============================
+Charger Manager provides a function "extern bool cm_suspend_again(void)".
+When cm_suspend_again is called, it monitors every battery. The suspend_ops
+callback of the system's platform_suspend_ops can call cm_suspend_again
+function to know whether Charger Manager wants to suspend again or not.
+If there are no other devices or tasks that want to use suspend_again
+feature, the platform_suspend_ops may directly refer to cm_suspend_again
+for its suspend_again callback.
+
+The cm_suspend_again() returns true (meaning "I want to suspend again")
+if the system was woken up by Charger Manager and the polling
+(in-suspend monitoring) results in "normal".
+
+4. Charger-Manager Data (struct charger_desc)
+=============================================
+For each battery charged independently from other batteries (if a series of
+batteries are charged by a single charger, they are counted as one independent
+battery), an instance of Charger Manager is attached to it.
+
+struct charger_desc {
+
+enum polling_modes polling_mode;
+	: CM_POLL_DISABLE: do not poll this battery.
+	  CM_POLL_ALWAYS: always poll this battery.
+	  CM_POLL_EXTERNAL_POWER_ONLY: poll this battery if and only if
+				       an external power source is attached.
+	  CM_POLL_CHARGING_ONLY: poll this battery if and only if the
+				 battery is being charged.
+
+unsigned int polling_interval_ms;
+	: Required polling interval in ms. Charger Manager will poll
+	this battery every polling_interval_ms or more frequently.
+
+enum data_source battery_present;
+	CM_FUEL_GAUGE: get battery presence information from fuel gauge.
+	CM_CHARGER_STAT: get battery presence from chargers.
+
+char **psy_charger_stat;
+	: An array ending with NULL that has power-supply-class names of
+	chargers. Each power-supply-class should provide "PRESENT" (if
+	battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an
+	external power source is attached or not), and "STATUS" (shows whether
+	the battery is {"FULL" or not FULL} or {"FULL", "Charging",
+	"Discharging", "NotCharging"}).
+
+int num_charger_regulators;
+struct regulator_bulk_data *charger_regulators;
+	: Regulators representing the chargers in the form for
+	regulator framework's bulk functions.
+
+char *psy_fuel_gauge;
+	: Power-supply-class name of the fuel gauge.
+
+int (*temperature_out_of_range)(int *mC);
+	: This callback returns 0 if the temperature is safe for charging,
+	a positive number if it is too hot to charge, and a negative number
+	if it is too cold to charge. With the variable mC, the callback returns
+	the temperature in 1/1000 of centigrade.
+};
+
+5. Other Considerations
+=======================
+
+At the charger/battery-related events such as battery-pulled-out,
+charger-pulled-out, charger-inserted, DCIN-over/under-voltage, charger-stopped,
+and others critical to chargers, the system should be configured to wake up.
+At least the following should wake up the system from a suspend:
+a) charger-on/off b) external-power-in/out c) battery-in/out (while charging)
+
+It is usually accomplished by configuring the PMIC as a wakeup source.
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 57de051a74b3..363f4d1ae067 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -235,6 +235,16 @@ config CHARGER_GPIO
 	  This driver can be build as a module. If so, the module will be
 	  called gpio-charger.
 
+config CHARGER_MANAGER
+	bool "Battery charger manager for multiple chargers"
+	depends on REGULATOR && RTC_CLASS
+	help
+          Say Y to enable charger-manager support, which allows multiple
+          chargers attached to a battery and multiple batteries attached to a
+          system. The charger-manager also can monitor charging status in
+          runtime and in suspend-to-RAM by waking up the system periodically
+          with help of suspend_again support.
+
 config CHARGER_MAX8997
 	tristate "Maxim MAX8997/MAX8966 PMIC battery charger driver"
 	depends on MFD_MAX8997 && REGULATOR_MAX8997
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index b4af13dd8b66..d3b24e99acbe 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -36,5 +36,6 @@ obj-$(CONFIG_CHARGER_ISP1704)	+= isp1704_charger.o
 obj-$(CONFIG_CHARGER_MAX8903)	+= max8903_charger.o
 obj-$(CONFIG_CHARGER_TWL4030)	+= twl4030_charger.o
 obj-$(CONFIG_CHARGER_GPIO)	+= gpio-charger.o
+obj-$(CONFIG_CHARGER_MANAGER)	+= charger-manager.o
 obj-$(CONFIG_CHARGER_MAX8997)	+= max8997_charger.o
 obj-$(CONFIG_CHARGER_MAX8998)	+= max8998_charger.o
diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
new file mode 100644
index 000000000000..727a259ea46c
--- /dev/null
+++ b/drivers/power/charger-manager.c
@@ -0,0 +1,779 @@
+/*
+ * Copyright (C) 2011 Samsung Electronics Co., Ltd.
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This driver enables to monitor battery health and control charger
+ * during suspend-to-mem.
+ * Charger manager depends on other devices. register this later than
+ * the depending devices.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+**/
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/rtc.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/platform_device.h>
+#include <linux/power/charger-manager.h>
+#include <linux/regulator/consumer.h>
+
+/*
+ * Regard CM_JIFFIES_SMALL jiffies is small enough to ignore for
+ * delayed works so that we can run delayed works with CM_JIFFIES_SMALL
+ * without any delays.
+ */
+#define	CM_JIFFIES_SMALL	(2)
+
+/* If y is valid (> 0) and smaller than x, do x = y */
+#define CM_MIN_VALID(x, y)	x = (((y > 0) && ((x) > (y))) ? (y) : (x))
+
+/*
+ * Regard CM_RTC_SMALL (sec) is small enough to ignore error in invoking
+ * rtc alarm. It should be 2 or larger
+ */
+#define CM_RTC_SMALL		(2)
+
+#define UEVENT_BUF_SIZE		32
+
+static LIST_HEAD(cm_list);
+static DEFINE_MUTEX(cm_list_mtx);
+
+/* About in-suspend (suspend-again) monitoring */
+static struct rtc_device *rtc_dev;
+/*
+ * Backup RTC alarm
+ * Save the wakeup alarm before entering suspend-to-RAM
+ */
+static struct rtc_wkalrm rtc_wkalarm_save;
+/* Backup RTC alarm time in terms of seconds since 01-01-1970 00:00:00 */
+static unsigned long rtc_wkalarm_save_time;
+static bool cm_suspended;
+static bool cm_rtc_set;
+static unsigned long cm_suspend_duration_ms;
+
+/* Global charger-manager description */
+static struct charger_global_desc *g_desc; /* init with setup_charger_manager */
+
+/**
+ * is_batt_present - See if the battery presents in place.
+ * @cm: the Charger Manager representing the battery.
+ */
+static bool is_batt_present(struct charger_manager *cm)
+{
+	union power_supply_propval val;
+	bool present = false;
+	int i, ret;
+
+	switch (cm->desc->battery_present) {
+	case CM_FUEL_GAUGE:
+		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+				POWER_SUPPLY_PROP_PRESENT, &val);
+		if (ret == 0 && val.intval)
+			present = true;
+		break;
+	case CM_CHARGER_STAT:
+		for (i = 0; cm->charger_stat[i]; i++) {
+			ret = cm->charger_stat[i]->get_property(
+					cm->charger_stat[i],
+					POWER_SUPPLY_PROP_PRESENT, &val);
+			if (ret == 0 && val.intval) {
+				present = true;
+				break;
+			}
+		}
+		break;
+	}
+
+	return present;
+}
+
+/**
+ * is_ext_pwr_online - See if an external power source is attached to charge
+ * @cm: the Charger Manager representing the battery.
+ *
+ * Returns true if at least one of the chargers of the battery has an external
+ * power source attached to charge the battery regardless of whether it is
+ * actually charging or not.
+ */
+static bool is_ext_pwr_online(struct charger_manager *cm)
+{
+	union power_supply_propval val;
+	bool online = false;
+	int i, ret;
+
+	/* If at least one of them has one, it's yes. */
+	for (i = 0; cm->charger_stat[i]; i++) {
+		ret = cm->charger_stat[i]->get_property(
+				cm->charger_stat[i],
+				POWER_SUPPLY_PROP_ONLINE, &val);
+		if (ret == 0 && val.intval) {
+			online = true;
+			break;
+		}
+	}
+
+	return online;
+}
+
+/**
+ * is_charging - Returns true if the battery is being charged.
+ * @cm: the Charger Manager representing the battery.
+ */
+static bool is_charging(struct charger_manager *cm)
+{
+	int i, ret;
+	bool charging = false;
+	union power_supply_propval val;
+
+	/* If there is no battery, it cannot be charged */
+	if (!is_batt_present(cm))
+		return false;
+
+	/* If at least one of the charger is charging, return yes */
+	for (i = 0; cm->charger_stat[i]; i++) {
+		/* 1. The charger sholuld not be DISABLED */
+		if (cm->emergency_stop)
+			continue;
+		if (!cm->charger_enabled)
+			continue;
+
+		/* 2. The charger should be online (ext-power) */
+		ret = cm->charger_stat[i]->get_property(
+				cm->charger_stat[i],
+				POWER_SUPPLY_PROP_ONLINE, &val);
+		if (ret) {
+			dev_warn(cm->dev, "Cannot read ONLINE value from %s.\n",
+					cm->desc->psy_charger_stat[i]);
+			continue;
+		}
+		if (val.intval == 0)
+			continue;
+
+		/*
+		 * 3. The charger should not be FULL, DISCHARGING,
+		 * or NOT_CHARGING.
+		 */
+		ret = cm->charger_stat[i]->get_property(
+				cm->charger_stat[i],
+				POWER_SUPPLY_PROP_STATUS, &val);
+		if (ret) {
+			dev_warn(cm->dev, "Cannot read STATUS value from %s.\n",
+					cm->desc->psy_charger_stat[i]);
+			continue;
+		}
+		if (val.intval == POWER_SUPPLY_STATUS_FULL ||
+				val.intval == POWER_SUPPLY_STATUS_DISCHARGING ||
+				val.intval == POWER_SUPPLY_STATUS_NOT_CHARGING)
+			continue;
+
+		/* Then, this is charging. */
+		charging = true;
+		break;
+	}
+
+	return charging;
+}
+
+/**
+ * is_polling_required - Return true if need to continue polling for this CM.
+ * @cm: the Charger Manager representing the battery.
+ */
+static bool is_polling_required(struct charger_manager *cm)
+{
+	switch (cm->desc->polling_mode) {
+	case CM_POLL_DISABLE:
+		return false;
+	case CM_POLL_ALWAYS:
+		return true;
+	case CM_POLL_EXTERNAL_POWER_ONLY:
+		return is_ext_pwr_online(cm);
+	case CM_POLL_CHARGING_ONLY:
+		return is_charging(cm);
+	default:
+		dev_warn(cm->dev, "Incorrect polling_mode (%d)\n",
+			cm->desc->polling_mode);
+	}
+
+	return false;
+}
+
+/**
+ * try_charger_enable - Enable/Disable chargers altogether
+ * @cm: the Charger Manager representing the battery.
+ * @enable: true: enable / false: disable
+ *
+ * Note that Charger Manager keeps the charger enabled regardless whether
+ * the charger is charging or not (because battery is full or no external
+ * power source exists) except when CM needs to disable chargers forcibly
+ * bacause of emergency causes; when the battery is overheated or too cold.
+ */
+static int try_charger_enable(struct charger_manager *cm, bool enable)
+{
+	int err = 0, i;
+	struct charger_desc *desc = cm->desc;
+
+	/* Ignore if it's redundent command */
+	if (enable && cm->charger_enabled)
+		return 0;
+	if (!enable && !cm->charger_enabled)
+		return 0;
+
+	if (enable) {
+		if (cm->emergency_stop)
+			return -EAGAIN;
+		err = regulator_bulk_enable(desc->num_charger_regulators,
+					desc->charger_regulators);
+	} else {
+		/*
+		 * Abnormal battery state - Stop charging forcibly,
+		 * even if charger was enabled at the other places
+		 */
+		err = regulator_bulk_disable(desc->num_charger_regulators,
+					desc->charger_regulators);
+
+		for (i = 0; i < desc->num_charger_regulators; i++) {
+			if (regulator_is_enabled(
+				    desc->charger_regulators[i].consumer)) {
+				regulator_force_disable(
+					desc->charger_regulators[i].consumer);
+				dev_warn(cm->dev,
+					"Disable regulator(%s) forcibly.\n",
+					desc->charger_regulators[i].supply);
+			}
+		}
+	}
+
+	if (!err)
+		cm->charger_enabled = enable;
+
+	return err;
+}
+
+/**
+ * uevent_notify - Let users know something has changed.
+ * @cm: the Charger Manager representing the battery.
+ * @event: the event string.
+ *
+ * If @event is null, it implies that uevent_notify is called
+ * by resume function. When called in the resume function, cm_suspended
+ * should be already reset to false in order to let uevent_notify
+ * notify the recent event during the suspend to users. While
+ * suspended, uevent_notify does not notify users, but tracks
+ * events so that uevent_notify can notify users later after resumed.
+ */
+static void uevent_notify(struct charger_manager *cm, const char *event)
+{
+	static char env_str[UEVENT_BUF_SIZE + 1] = "";
+	static char env_str_save[UEVENT_BUF_SIZE + 1] = "";
+
+	if (cm_suspended) {
+		/* Nothing in suspended-event buffer */
+		if (env_str_save[0] == 0) {
+			if (!strncmp(env_str, event, UEVENT_BUF_SIZE))
+				return; /* status not changed */
+			strncpy(env_str_save, event, UEVENT_BUF_SIZE);
+			return;
+		}
+
+		if (!strncmp(env_str_save, event, UEVENT_BUF_SIZE))
+			return; /* Duplicated. */
+		else
+			strncpy(env_str_save, event, UEVENT_BUF_SIZE);
+
+		return;
+	}
+
+	if (event == NULL) {
+		/* No messages pending */
+		if (!env_str_save[0])
+			return;
+
+		strncpy(env_str, env_str_save, UEVENT_BUF_SIZE);
+		kobject_uevent(&cm->dev->kobj, KOBJ_CHANGE);
+		env_str_save[0] = 0;
+
+		return;
+	}
+
+	/* status not changed */
+	if (!strncmp(env_str, event, UEVENT_BUF_SIZE))
+		return;
+
+	/* save the status and notify the update */
+	strncpy(env_str, event, UEVENT_BUF_SIZE);
+	kobject_uevent(&cm->dev->kobj, KOBJ_CHANGE);
+
+	dev_info(cm->dev, event);
+}
+
+/**
+ * _cm_monitor - Monitor the temperature and return true for exceptions.
+ * @cm: the Charger Manager representing the battery.
+ *
+ * Returns true if there is an event to notify for the battery.
+ * (True if the status of "emergency_stop" changes)
+ */
+static bool _cm_monitor(struct charger_manager *cm)
+{
+	struct charger_desc *desc = cm->desc;
+	int temp = desc->temperature_out_of_range(&cm->last_temp_mC);
+
+	dev_dbg(cm->dev, "monitoring (%2.2d.%3.3dC)\n",
+		cm->last_temp_mC / 1000, cm->last_temp_mC % 1000);
+
+	/* It has been stopped or charging already */
+	if (!!temp == !!cm->emergency_stop)
+		return false;
+
+	if (temp) {
+		cm->emergency_stop = temp;
+		if (!try_charger_enable(cm, false)) {
+			if (temp > 0)
+				uevent_notify(cm, "OVERHEAT");
+			else
+				uevent_notify(cm, "COLD");
+		}
+	} else {
+		cm->emergency_stop = 0;
+		if (!try_charger_enable(cm, true))
+			uevent_notify(cm, "CHARGING");
+	}
+
+	return true;
+}
+
+/**
+ * cm_monitor - Monitor every battery.
+ *
+ * Returns true if there is an event to notify from any of the batteries.
+ * (True if the status of "emergency_stop" changes)
+ */
+static bool cm_monitor(void)
+{
+	bool stop = false;
+	struct charger_manager *cm;
+
+	mutex_lock(&cm_list_mtx);
+
+	list_for_each_entry(cm, &cm_list, entry)
+		stop = stop || _cm_monitor(cm);
+
+	mutex_unlock(&cm_list_mtx);
+
+	return stop;
+}
+
+/**
+ * cm_setup_timer - For in-suspend monitoring setup wakeup alarm
+ *		    for suspend_again.
+ *
+ * Returns true if the alarm is set for Charger Manager to use.
+ * Returns false if
+ *	cm_setup_timer fails to set an alarm,
+ *	cm_setup_timer does not need to set an alarm for Charger Manager,
+ *	or an alarm previously configured is to be used.
+ */
+static bool cm_setup_timer(void)
+{
+	struct charger_manager *cm;
+	unsigned int wakeup_ms = UINT_MAX;
+	bool ret = false;
+
+	mutex_lock(&cm_list_mtx);
+
+	list_for_each_entry(cm, &cm_list, entry) {
+		/* Skip if polling is not required for this CM */
+		if (!is_polling_required(cm) && !cm->emergency_stop)
+			continue;
+		if (cm->desc->polling_interval_ms == 0)
+			continue;
+		CM_MIN_VALID(wakeup_ms, cm->desc->polling_interval_ms);
+	}
+
+	mutex_unlock(&cm_list_mtx);
+
+	if (wakeup_ms < UINT_MAX && wakeup_ms > 0) {
+		pr_info("Charger Manager wakeup timer: %u ms.\n", wakeup_ms);
+		if (rtc_dev) {
+			struct rtc_wkalrm tmp;
+			unsigned long time, now;
+			unsigned long add = DIV_ROUND_UP(wakeup_ms, 1000);
+
+			/*
+			 * Set alarm with the polling interval (wakeup_ms)
+			 * except when rtc_wkalarm_save comes first.
+			 * However, the alarm time should be NOW +
+			 * CM_RTC_SMALL or later.
+			 */
+			tmp.enabled = 1;
+			rtc_read_time(rtc_dev, &tmp.time);
+			rtc_tm_to_time(&tmp.time, &now);
+			if (add < CM_RTC_SMALL)
+				add = CM_RTC_SMALL;
+			time = now + add;
+
+			ret = true;
+
+			if (rtc_wkalarm_save.enabled &&
+			    rtc_wkalarm_save_time &&
+			    rtc_wkalarm_save_time < time) {
+				if (rtc_wkalarm_save_time < now + CM_RTC_SMALL)
+					time = now + CM_RTC_SMALL;
+				else
+					time = rtc_wkalarm_save_time;
+
+				/* The timer is not appointed by CM */
+				ret = false;
+			}
+
+			pr_info("Waking up after %lu secs.\n",
+					time - now);
+
+			rtc_time_to_tm(time, &tmp.time);
+			rtc_set_alarm(rtc_dev, &tmp);
+			cm_suspend_duration_ms += wakeup_ms;
+			return ret;
+		}
+	}
+
+	if (rtc_dev)
+		rtc_set_alarm(rtc_dev, &rtc_wkalarm_save);
+	return false;
+}
+
+/**
+ * cm_suspend_again - Determine whether suspend again or not
+ *
+ * Returns true if the system should be suspended again
+ * Returns false if the system should be woken up
+ */
+bool cm_suspend_again(void)
+{
+	struct charger_manager *cm;
+	bool ret = false;
+
+	if (!g_desc || !g_desc->rtc_only_wakeup || !g_desc->rtc_only_wakeup() ||
+	    !cm_rtc_set)
+		return false;
+
+	if (cm_monitor())
+		goto out;
+
+	ret = true;
+	mutex_lock(&cm_list_mtx);
+	list_for_each_entry(cm, &cm_list, entry) {
+		if (cm->status_save_ext_pwr_inserted != is_ext_pwr_online(cm) ||
+		    cm->status_save_batt != is_batt_present(cm))
+			ret = false;
+	}
+	mutex_unlock(&cm_list_mtx);
+
+	cm_rtc_set = cm_setup_timer();
+out:
+	/* It's about the time when the non-CM appointed timer goes off */
+	if (rtc_wkalarm_save.enabled) {
+		unsigned long now;
+		struct rtc_time tmp;
+
+		rtc_read_time(rtc_dev, &tmp);
+		rtc_tm_to_time(&tmp, &now);
+
+		if (rtc_wkalarm_save_time &&
+		    now + CM_RTC_SMALL >= rtc_wkalarm_save_time)
+			return false;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cm_suspend_again);
+
+/**
+ * setup_charger_manager - initialize charger_global_desc data
+ * @gd: pointer to instance of charger_global_desc
+ */
+int setup_charger_manager(struct charger_global_desc *gd)
+{
+	if (!gd)
+		return -EINVAL;
+
+	if (rtc_dev)
+		rtc_class_close(rtc_dev);
+	rtc_dev = NULL;
+	g_desc = NULL;
+
+	if (!gd->rtc_only_wakeup) {
+		pr_err("The callback rtc_only_wakeup is not given.\n");
+		return -EINVAL;
+	}
+
+	if (gd->rtc_name) {
+		rtc_dev = rtc_class_open(gd->rtc_name);
+		if (IS_ERR_OR_NULL(rtc_dev)) {
+			rtc_dev = NULL;
+			/* Retry at probe. RTC may be not registered yet */
+		}
+	} else {
+		pr_warn("No wakeup timer is given for charger manager."
+			"In-suspend monitoring won't work.\n");
+	}
+
+	g_desc = gd;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(setup_charger_manager);
+
+static int charger_manager_probe(struct platform_device *pdev)
+{
+	struct charger_desc *desc = dev_get_platdata(&pdev->dev);
+	struct charger_manager *cm;
+	int ret = 0, i = 0;
+
+	if (g_desc && !rtc_dev && g_desc->rtc_name) {
+		rtc_dev = rtc_class_open(g_desc->rtc_name);
+		if (IS_ERR_OR_NULL(rtc_dev)) {
+			rtc_dev = NULL;
+			dev_err(&pdev->dev, "Cannot get RTC %s.\n",
+				g_desc->rtc_name);
+			ret = -ENODEV;
+			goto err_alloc;
+		}
+	}
+
+	if (!desc) {
+		dev_err(&pdev->dev, "No platform data (desc) found.\n");
+		ret = -ENODEV;
+		goto err_alloc;
+	}
+
+	cm = kzalloc(sizeof(struct charger_manager), GFP_KERNEL);
+	if (!cm) {
+		dev_err(&pdev->dev, "Cannot allocate memory.\n");
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	/* Basic Values. Unspecified are Null or 0 */
+	cm->dev = &pdev->dev;
+	cm->desc = kzalloc(sizeof(struct charger_desc), GFP_KERNEL);
+	if (!cm->desc) {
+		dev_err(&pdev->dev, "Cannot allocate memory.\n");
+		ret = -ENOMEM;
+		goto err_alloc_desc;
+	}
+	memcpy(cm->desc, desc, sizeof(struct charger_desc));
+	cm->last_temp_mC = INT_MIN; /* denotes "unmeasured, yet" */
+
+	if (!desc->charger_regulators || desc->num_charger_regulators < 1) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "charger_regulators undefined.\n");
+		goto err_no_charger;
+	}
+
+	if (!desc->psy_charger_stat || !desc->psy_charger_stat[0]) {
+		dev_err(&pdev->dev, "No power supply defined.\n");
+		ret = -EINVAL;
+		goto err_no_charger_stat;
+	}
+
+	/* Counting index only */
+	while (desc->psy_charger_stat[i])
+		i++;
+
+	cm->charger_stat = kzalloc(sizeof(struct power_supply *) * (i + 1),
+				   GFP_KERNEL);
+	if (!cm->charger_stat) {
+		ret = -ENOMEM;
+		goto err_no_charger_stat;
+	}
+
+	for (i = 0; desc->psy_charger_stat[i]; i++) {
+		cm->charger_stat[i] = power_supply_get_by_name(
+					desc->psy_charger_stat[i]);
+		if (!cm->charger_stat[i]) {
+			dev_err(&pdev->dev, "Cannot find power supply "
+					"\"%s\"\n",
+					desc->psy_charger_stat[i]);
+			ret = -ENODEV;
+			goto err_chg_stat;
+		}
+	}
+
+	cm->fuel_gauge = power_supply_get_by_name(desc->psy_fuel_gauge);
+	if (!cm->fuel_gauge) {
+		dev_err(&pdev->dev, "Cannot find power supply \"%s\"\n",
+				desc->psy_fuel_gauge);
+		ret = -ENODEV;
+		goto err_chg_stat;
+	}
+
+	if (desc->polling_interval_ms == 0 ||
+	    msecs_to_jiffies(desc->polling_interval_ms) <= CM_JIFFIES_SMALL) {
+		dev_err(&pdev->dev, "polling_interval_ms is too small\n");
+		ret = -EINVAL;
+		goto err_chg_stat;
+	}
+
+	if (!desc->temperature_out_of_range) {
+		dev_err(&pdev->dev, "there is no temperature_out_of_range\n");
+		ret = -EINVAL;
+		goto err_chg_stat;
+	}
+
+	platform_set_drvdata(pdev, cm);
+
+	ret = regulator_bulk_get(&pdev->dev, desc->num_charger_regulators,
+				 desc->charger_regulators);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot get charger regulators.\n");
+		goto err_chg_stat;
+	}
+
+	ret = try_charger_enable(cm, true);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot enable charger regulators\n");
+		goto err_chg_enable;
+	}
+
+	/* Add to the list */
+	mutex_lock(&cm_list_mtx);
+	list_add(&cm->entry, &cm_list);
+	mutex_unlock(&cm_list_mtx);
+
+	return 0;
+
+err_chg_enable:
+	if (desc->charger_regulators)
+		regulator_bulk_free(desc->num_charger_regulators,
+					desc->charger_regulators);
+err_chg_stat:
+	kfree(cm->charger_stat);
+err_no_charger_stat:
+err_no_charger:
+	kfree(cm->desc);
+err_alloc_desc:
+	kfree(cm);
+err_alloc:
+	return ret;
+}
+
+static int __devexit charger_manager_remove(struct platform_device *pdev)
+{
+	struct charger_manager *cm = platform_get_drvdata(pdev);
+	struct charger_desc *desc = cm->desc;
+
+	/* Remove from the list */
+	mutex_lock(&cm_list_mtx);
+	list_del(&cm->entry);
+	mutex_unlock(&cm_list_mtx);
+
+	if (desc->charger_regulators)
+		regulator_bulk_free(desc->num_charger_regulators,
+					desc->charger_regulators);
+	kfree(cm->charger_stat);
+	kfree(cm->desc);
+	kfree(cm);
+
+	return 0;
+}
+
+const struct platform_device_id charger_manager_id[] = {
+	{ "charger-manager", 0 },
+	{ },
+};
+
+static int cm_suspend_prepare(struct device *dev)
+{
+	struct platform_device *pdev = container_of(dev, struct platform_device,
+						    dev);
+	struct charger_manager *cm = platform_get_drvdata(pdev);
+
+	if (!cm_suspended) {
+		if (rtc_dev) {
+			struct rtc_time tmp;
+			unsigned long now;
+
+			rtc_read_alarm(rtc_dev, &rtc_wkalarm_save);
+			rtc_read_time(rtc_dev, &tmp);
+
+			if (rtc_wkalarm_save.enabled) {
+				rtc_tm_to_time(&rtc_wkalarm_save.time,
+					       &rtc_wkalarm_save_time);
+				rtc_tm_to_time(&tmp, &now);
+				if (now > rtc_wkalarm_save_time)
+					rtc_wkalarm_save_time = 0;
+			} else {
+				rtc_wkalarm_save_time = 0;
+			}
+		}
+		cm_suspended = true;
+	}
+
+	cm->status_save_ext_pwr_inserted = is_ext_pwr_online(cm);
+	cm->status_save_batt = is_batt_present(cm);
+
+	if (!cm_rtc_set) {
+		cm_suspend_duration_ms = 0;
+		cm_rtc_set = cm_setup_timer();
+	}
+
+	return 0;
+}
+
+static void cm_suspend_complete(struct device *dev)
+{
+	struct platform_device *pdev = container_of(dev, struct platform_device,
+						    dev);
+	struct charger_manager *cm = platform_get_drvdata(pdev);
+
+	if (cm_suspended) {
+		if (rtc_dev) {
+			struct rtc_wkalrm tmp;
+
+			rtc_read_alarm(rtc_dev, &tmp);
+			rtc_wkalarm_save.pending = tmp.pending;
+			rtc_set_alarm(rtc_dev, &rtc_wkalarm_save);
+		}
+		cm_suspended = false;
+		cm_rtc_set = false;
+	}
+
+	uevent_notify(cm, NULL);
+}
+
+static const struct dev_pm_ops charger_manager_pm = {
+	.prepare	= cm_suspend_prepare,
+	.complete	= cm_suspend_complete,
+};
+
+static struct platform_driver charger_manager_driver = {
+	.driver = {
+		.name = "charger-manager",
+		.owner = THIS_MODULE,
+		.pm = &charger_manager_pm,
+	},
+	.probe = charger_manager_probe,
+	.remove = __devexit_p(charger_manager_remove),
+	.id_table = charger_manager_id,
+};
+
+static int __init charger_manager_init(void)
+{
+	return platform_driver_register(&charger_manager_driver);
+}
+late_initcall(charger_manager_init);
+
+static void __exit charger_manager_cleanup(void)
+{
+	platform_driver_unregister(&charger_manager_driver);
+}
+module_exit(charger_manager_cleanup);
+
+MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
+MODULE_DESCRIPTION("Charger Manager");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("charger-manager");
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
new file mode 100644
index 000000000000..102c5b3f3325
--- /dev/null
+++ b/include/linux/power/charger-manager.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2011 Samsung Electronics Co., Ltd.
+ * MyungJoo.Ham <myungjoo.ham@samsung.com>
+ *
+ * Charger Manager.
+ * This framework enables to control and multiple chargers and to
+ * monitor charging even in the context of suspend-to-RAM with
+ * an interface combining the chargers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+**/
+
+#ifndef _CHARGER_MANAGER_H
+#define _CHARGER_MANAGER_H
+
+#include <linux/power_supply.h>
+
+enum data_source {
+	CM_FUEL_GAUGE,
+	CM_CHARGER_STAT,
+};
+
+enum polling_modes {
+	CM_POLL_DISABLE = 0,
+	CM_POLL_ALWAYS,
+	CM_POLL_EXTERNAL_POWER_ONLY,
+	CM_POLL_CHARGING_ONLY,
+};
+
+/**
+ * struct charger_global_desc
+ * @rtc_name: the name of RTC used to wake up the system from suspend.
+ * @rtc_only_wakeup:
+ *	If the system is woken up by waekup-sources other than the RTC or
+ *	callbacks, Charger Manager should recognize with
+ *	rtc_only_wakeup() returning false.
+ *	If the RTC given to CM is the only wakeup reason,
+ *	rtc_only_wakeup should return true.
+ */
+struct charger_global_desc {
+	char *rtc_name;
+
+	bool (*rtc_only_wakeup)(void);
+};
+
+/**
+ * struct charger_desc
+ * @polling_mode:
+ *	Determine which polling mode will be used
+ * @polling_interval_ms: interval in millisecond at which
+ *	charger manager will monitor battery health
+ * @battery_present:
+ *	Specify where information for existance of battery can be obtained
+ * @psy_charger_stat: the names of power-supply for chargers
+ * @num_charger_regulator: the number of entries in charger_regulators
+ * @charger_regulators: array of regulator_bulk_data for chargers
+ * @psy_fuel_gauge: the name of power-supply for fuel gauge
+ * @temperature_out_of_range:
+ *	Determine whether the status is overheat or cold or normal.
+ *	return_value > 0: overheat
+ *	return_value == 0: normal
+ *	return_value < 0: cold
+ */
+struct charger_desc {
+	enum polling_modes polling_mode;
+	unsigned int polling_interval_ms;
+
+	enum data_source battery_present;
+
+	char **psy_charger_stat;
+
+	int num_charger_regulators;
+	struct regulator_bulk_data *charger_regulators;
+
+	char *psy_fuel_gauge;
+
+	int (*temperature_out_of_range)(int *mC);
+};
+
+#define PSY_NAME_MAX	30
+
+/**
+ * struct charger_manager
+ * @entry: entry for list
+ * @dev: device pointer
+ * @desc: instance of charger_desc
+ * @fuel_gauge: power_supply for fuel gauge
+ * @charger_stat: array of power_supply for chargers
+ * @charger_enabled: the state of charger
+ * @emergency_stop:
+ *	When setting true, stop charging
+ * @last_temp_mC: the measured temperature in milli-Celsius
+ * @status_save_ext_pwr_inserted:
+ *	saved status of external power before entering suspend-to-RAM
+ * @status_save_batt:
+ *	saved status of battery before entering suspend-to-RAM
+ */
+struct charger_manager {
+	struct list_head entry;
+	struct device *dev;
+	struct charger_desc *desc;
+
+	struct power_supply *fuel_gauge;
+	struct power_supply **charger_stat;
+
+	bool charger_enabled;
+
+	int emergency_stop;
+	int last_temp_mC;
+
+	bool status_save_ext_pwr_inserted;
+	bool status_save_batt;
+};
+
+#ifdef CONFIG_CHARGER_MANAGER
+extern int setup_charger_manager(struct charger_global_desc *gd);
+extern bool cm_suspend_again(void);
+#else
+static void __maybe_unused setup_charger_manager(struct charger_global_desc *gd)
+{ }
+
+static bool __maybe_unused cm_suspend_again(void)
+{
+	return false;
+}
+#endif
+
+#endif /* _CHARGER_MANAGER_H */
-- 
cgit v1.2.3


From ad3d13eee78ec44194bf919a37e2f711e53cbdf0 Mon Sep 17 00:00:00 2001
From: Donggeun Kim <dg77.kim@samsung.com>
Date: Tue, 27 Dec 2011 18:47:49 +0900
Subject: power_supply: Charger-Manager: Add properties for power-supply-class

Charger Manager provides power-supply-class aggregating
information from multiple chargers and a fuel-gauge.

Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 Documentation/power/charger-manager.txt |  14 ++
 drivers/power/charger-manager.c         | 295 +++++++++++++++++++++++++++++++-
 include/linux/power/charger-manager.h   |  17 ++
 3 files changed, 325 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt
index 081489f3db25..fdcca991df30 100644
--- a/Documentation/power/charger-manager.txt
+++ b/Documentation/power/charger-manager.txt
@@ -98,6 +98,11 @@ battery), an instance of Charger Manager is attached to it.
 
 struct charger_desc {
 
+char *psy_name;
+	: The power-supply-class name of the battery. Default is
+	"battery" if psy_name is NULL. Users can access the psy entries
+	at "/sys/class/power_supply/[psy_name]/".
+
 enum polling_modes polling_mode;
 	: CM_POLL_DISABLE: do not poll this battery.
 	  CM_POLL_ALWAYS: always poll this battery.
@@ -106,6 +111,12 @@ enum polling_modes polling_mode;
 	  CM_POLL_CHARGING_ONLY: poll this battery if and only if the
 				 battery is being charged.
 
+unsigned int fullbatt_uV;
+	: If specified with a non-zero value, Charger Manager assumes
+	that the battery is full (capacity = 100) if the battery is not being
+	charged and the battery voltage is equal to or greater than
+	fullbatt_uV.
+
 unsigned int polling_interval_ms;
 	: Required polling interval in ms. Charger Manager will poll
 	this battery every polling_interval_ms or more frequently.
@@ -131,10 +142,13 @@ char *psy_fuel_gauge;
 	: Power-supply-class name of the fuel gauge.
 
 int (*temperature_out_of_range)(int *mC);
+bool measure_battery_temp;
 	: This callback returns 0 if the temperature is safe for charging,
 	a positive number if it is too hot to charge, and a negative number
 	if it is too cold to charge. With the variable mC, the callback returns
 	the temperature in 1/1000 of centigrade.
+	The source of temperature can be battery or ambient one according to
+	the value of measure_battery_temp.
 };
 
 5. Other Considerations
diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 727a259ea46c..0378d019efae 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -121,6 +121,32 @@ static bool is_ext_pwr_online(struct charger_manager *cm)
 	return online;
 }
 
+/**
+ * get_batt_uV - Get the voltage level of the battery
+ * @cm: the Charger Manager representing the battery.
+ * @uV: the voltage level returned.
+ *
+ * Returns 0 if there is no error.
+ * Returns a negative value on error.
+ */
+static int get_batt_uV(struct charger_manager *cm, int *uV)
+{
+	union power_supply_propval val;
+	int ret;
+
+	if (cm->fuel_gauge)
+		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+				POWER_SUPPLY_PROP_VOLTAGE_NOW, &val);
+	else
+		return -ENODEV;
+
+	if (ret)
+		return ret;
+
+	*uV = val.intval;
+	return 0;
+}
+
 /**
  * is_charging - Returns true if the battery is being charged.
  * @cm: the Charger Manager representing the battery.
@@ -369,6 +395,208 @@ static bool cm_monitor(void)
 	return stop;
 }
 
+static int charger_get_property(struct power_supply *psy,
+		enum power_supply_property psp,
+		union power_supply_propval *val)
+{
+	struct charger_manager *cm = container_of(psy,
+			struct charger_manager, charger_psy);
+	struct charger_desc *desc = cm->desc;
+	int i, ret = 0, uV;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		if (is_charging(cm))
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else if (is_ext_pwr_online(cm))
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		if (cm->emergency_stop > 0)
+			val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+		else if (cm->emergency_stop < 0)
+			val->intval = POWER_SUPPLY_HEALTH_COLD;
+		else
+			val->intval = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		if (is_batt_present(cm))
+			val->intval = 1;
+		else
+			val->intval = 0;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = get_batt_uV(cm, &i);
+		val->intval = i;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+				POWER_SUPPLY_PROP_CURRENT_NOW, val);
+		break;
+	case POWER_SUPPLY_PROP_TEMP:
+		/* in thenth of centigrade */
+		if (cm->last_temp_mC == INT_MIN)
+			desc->temperature_out_of_range(&cm->last_temp_mC);
+		val->intval = cm->last_temp_mC / 100;
+		if (!desc->measure_battery_temp)
+			ret = -ENODEV;
+		break;
+	case POWER_SUPPLY_PROP_TEMP_AMBIENT:
+		/* in thenth of centigrade */
+		if (cm->last_temp_mC == INT_MIN)
+			desc->temperature_out_of_range(&cm->last_temp_mC);
+		val->intval = cm->last_temp_mC / 100;
+		if (desc->measure_battery_temp)
+			ret = -ENODEV;
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		if (!cm->fuel_gauge) {
+			ret = -ENODEV;
+			break;
+		}
+
+		if (!is_batt_present(cm)) {
+			/* There is no battery. Assume 100% */
+			val->intval = 100;
+			break;
+		}
+
+		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+					POWER_SUPPLY_PROP_CAPACITY, val);
+		if (ret)
+			break;
+
+		if (val->intval > 100) {
+			val->intval = 100;
+			break;
+		}
+		if (val->intval < 0)
+			val->intval = 0;
+
+		/* Do not adjust SOC when charging: voltage is overrated */
+		if (is_charging(cm))
+			break;
+
+		/*
+		 * If the capacity value is inconsistent, calibrate it base on
+		 * the battery voltage values and the thresholds given as desc
+		 */
+		ret = get_batt_uV(cm, &uV);
+		if (ret) {
+			/* Voltage information not available. No calibration */
+			ret = 0;
+			break;
+		}
+
+		if (desc->fullbatt_uV > 0 && uV >= desc->fullbatt_uV &&
+		    !is_charging(cm)) {
+			val->intval = 100;
+			break;
+		}
+
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		if (is_ext_pwr_online(cm))
+			val->intval = 1;
+		else
+			val->intval = 0;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+		if (cm->fuel_gauge) {
+			if (cm->fuel_gauge->get_property(cm->fuel_gauge,
+			    POWER_SUPPLY_PROP_CHARGE_FULL, val) == 0)
+				break;
+		}
+
+		if (is_ext_pwr_online(cm)) {
+			/* Not full if it's charging. */
+			if (is_charging(cm)) {
+				val->intval = 0;
+				break;
+			}
+			/*
+			 * Full if it's powered but not charging andi
+			 * not forced stop by emergency
+			 */
+			if (!cm->emergency_stop) {
+				val->intval = 1;
+				break;
+			}
+		}
+
+		/* Full if it's over the fullbatt voltage */
+		ret = get_batt_uV(cm, &uV);
+		if (!ret && desc->fullbatt_uV > 0 && uV >= desc->fullbatt_uV &&
+		    !is_charging(cm)) {
+			val->intval = 1;
+			break;
+		}
+
+		/* Full if the cap is 100 */
+		if (cm->fuel_gauge) {
+			ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+					POWER_SUPPLY_PROP_CAPACITY, val);
+			if (!ret && val->intval >= 100 && !is_charging(cm)) {
+				val->intval = 1;
+				break;
+			}
+		}
+
+		val->intval = 0;
+		ret = 0;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+		if (is_charging(cm)) {
+			ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+						POWER_SUPPLY_PROP_CHARGE_NOW,
+						val);
+			if (ret) {
+				val->intval = 1;
+				ret = 0;
+			} else {
+				/* If CHARGE_NOW is supplied, use it */
+				val->intval = (val->intval > 0) ?
+						val->intval : 1;
+			}
+		} else {
+			val->intval = 0;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+	return ret;
+}
+
+#define NUM_CHARGER_PSY_OPTIONAL	(4)
+static enum power_supply_property default_charger_props[] = {
+	/* Guaranteed to provide */
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	/*
+	 * Optional properties are:
+	 * POWER_SUPPLY_PROP_CHARGE_NOW,
+	 * POWER_SUPPLY_PROP_CURRENT_NOW,
+	 * POWER_SUPPLY_PROP_TEMP, and
+	 * POWER_SUPPLY_PROP_TEMP_AMBIENT,
+	 */
+};
+
+static struct power_supply psy_default = {
+	.name = "battery",
+	.type = POWER_SUPPLY_TYPE_BATTERY,
+	.properties = default_charger_props,
+	.num_properties = ARRAY_SIZE(default_charger_props),
+	.get_property = charger_get_property,
+};
+
 /**
  * cm_setup_timer - For in-suspend monitoring setup wakeup alarm
  *		    for suspend_again.
@@ -532,6 +760,7 @@ static int charger_manager_probe(struct platform_device *pdev)
 	struct charger_desc *desc = dev_get_platdata(&pdev->dev);
 	struct charger_manager *cm;
 	int ret = 0, i = 0;
+	union power_supply_propval val;
 
 	if (g_desc && !rtc_dev && g_desc->rtc_name) {
 		rtc_dev = rtc_class_open(g_desc->rtc_name);
@@ -626,11 +855,68 @@ static int charger_manager_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, cm);
 
+	memcpy(&cm->charger_psy, &psy_default,
+				sizeof(psy_default));
+	if (!desc->psy_name) {
+		strncpy(cm->psy_name_buf, psy_default.name,
+				PSY_NAME_MAX);
+	} else {
+		strncpy(cm->psy_name_buf, desc->psy_name, PSY_NAME_MAX);
+	}
+	cm->charger_psy.name = cm->psy_name_buf;
+
+	/* Allocate for psy properties because they may vary */
+	cm->charger_psy.properties = kzalloc(sizeof(enum power_supply_property)
+				* (ARRAY_SIZE(default_charger_props) +
+				NUM_CHARGER_PSY_OPTIONAL),
+				GFP_KERNEL);
+	if (!cm->charger_psy.properties) {
+		dev_err(&pdev->dev, "Cannot allocate for psy properties.\n");
+		ret = -ENOMEM;
+		goto err_chg_stat;
+	}
+	memcpy(cm->charger_psy.properties, default_charger_props,
+		sizeof(enum power_supply_property) *
+		ARRAY_SIZE(default_charger_props));
+	cm->charger_psy.num_properties = psy_default.num_properties;
+
+	/* Find which optional psy-properties are available */
+	if (!cm->fuel_gauge->get_property(cm->fuel_gauge,
+					  POWER_SUPPLY_PROP_CHARGE_NOW, &val)) {
+		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+				POWER_SUPPLY_PROP_CHARGE_NOW;
+		cm->charger_psy.num_properties++;
+	}
+	if (!cm->fuel_gauge->get_property(cm->fuel_gauge,
+					  POWER_SUPPLY_PROP_CURRENT_NOW,
+					  &val)) {
+		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+				POWER_SUPPLY_PROP_CURRENT_NOW;
+		cm->charger_psy.num_properties++;
+	}
+	if (!desc->measure_battery_temp) {
+		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+				POWER_SUPPLY_PROP_TEMP_AMBIENT;
+		cm->charger_psy.num_properties++;
+	}
+	if (desc->measure_battery_temp) {
+		cm->charger_psy.properties[cm->charger_psy.num_properties] =
+				POWER_SUPPLY_PROP_TEMP;
+		cm->charger_psy.num_properties++;
+	}
+
+	ret = power_supply_register(NULL, &cm->charger_psy);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot register charger-manager with"
+				" name \"%s\".\n", cm->charger_psy.name);
+		goto err_register;
+	}
+
 	ret = regulator_bulk_get(&pdev->dev, desc->num_charger_regulators,
 				 desc->charger_regulators);
 	if (ret) {
 		dev_err(&pdev->dev, "Cannot get charger regulators.\n");
-		goto err_chg_stat;
+		goto err_bulk_get;
 	}
 
 	ret = try_charger_enable(cm, true);
@@ -650,6 +936,10 @@ err_chg_enable:
 	if (desc->charger_regulators)
 		regulator_bulk_free(desc->num_charger_regulators,
 					desc->charger_regulators);
+err_bulk_get:
+	power_supply_unregister(&cm->charger_psy);
+err_register:
+	kfree(cm->charger_psy.properties);
 err_chg_stat:
 	kfree(cm->charger_stat);
 err_no_charger_stat:
@@ -674,6 +964,9 @@ static int __devexit charger_manager_remove(struct platform_device *pdev)
 	if (desc->charger_regulators)
 		regulator_bulk_free(desc->num_charger_regulators,
 					desc->charger_regulators);
+
+	power_supply_unregister(&cm->charger_psy);
+	kfree(cm->charger_psy.properties);
 	kfree(cm->charger_stat);
 	kfree(cm->desc);
 	kfree(cm);
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index 102c5b3f3325..4f75e531c112 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -47,8 +47,12 @@ struct charger_global_desc {
 
 /**
  * struct charger_desc
+ * @psy_name: the name of power-supply-class for charger manager
  * @polling_mode:
  *	Determine which polling mode will be used
+ * @fullbatt_uV: voltage in microvolt
+ *	If it is not being charged and VBATT >= fullbatt_uV,
+ *	it is assumed to be full.
  * @polling_interval_ms: interval in millisecond at which
  *	charger manager will monitor battery health
  * @battery_present:
@@ -62,11 +66,18 @@ struct charger_global_desc {
  *	return_value > 0: overheat
  *	return_value == 0: normal
  *	return_value < 0: cold
+ * @measure_battery_temp:
+ *	true: measure battery temperature
+ *	false: measure ambient temperature
  */
 struct charger_desc {
+	char *psy_name;
+
 	enum polling_modes polling_mode;
 	unsigned int polling_interval_ms;
 
+	unsigned int fullbatt_uV;
+
 	enum data_source battery_present;
 
 	char **psy_charger_stat;
@@ -77,6 +88,7 @@ struct charger_desc {
 	char *psy_fuel_gauge;
 
 	int (*temperature_out_of_range)(int *mC);
+	bool measure_battery_temp;
 };
 
 #define PSY_NAME_MAX	30
@@ -92,6 +104,8 @@ struct charger_desc {
  * @emergency_stop:
  *	When setting true, stop charging
  * @last_temp_mC: the measured temperature in milli-Celsius
+ * @psy_name_buf: the name of power-supply-class for charger manager
+ * @charger_psy: power_supply for charger manager
  * @status_save_ext_pwr_inserted:
  *	saved status of external power before entering suspend-to-RAM
  * @status_save_batt:
@@ -110,6 +124,9 @@ struct charger_manager {
 	int emergency_stop;
 	int last_temp_mC;
 
+	char psy_name_buf[PSY_NAME_MAX + 1];
+	struct power_supply charger_psy;
+
 	bool status_save_ext_pwr_inserted;
 	bool status_save_batt;
 };
-- 
cgit v1.2.3


From 9b8872273af6983b246252a6508fa7cf34c69d6e Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@ti.com>
Date: Wed, 30 Nov 2011 23:08:33 -0800
Subject: power_supply: Add "unknown" in power supply type

For the default value of power supply type, "unknown" is added.
With default prop value, supply type property can be displayed
as default - "Unknown".

Signed-off-by: Milo(Woogyom) Kim <milo.kim@ti.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/power_supply_sysfs.c | 2 +-
 include/linux/power_supply.h       | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 58cc4906d216..939e2e432553 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -42,7 +42,7 @@ static ssize_t power_supply_show_property(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf) {
 	static char *type_text[] = {
-		"Battery", "UPS", "Mains", "USB",
+		"Unknown", "Battery", "UPS", "Mains", "USB",
 		"USB_DCP", "USB_CDP", "USB_ACA"
 	};
 	static char *status_text[] = {
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 204c18dfdc9e..9c83e04f6a43 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -123,7 +123,8 @@ enum power_supply_property {
 };
 
 enum power_supply_type {
-	POWER_SUPPLY_TYPE_BATTERY = 0,
+	POWER_SUPPLY_TYPE_UNKNOWN = 0,
+	POWER_SUPPLY_TYPE_BATTERY,
 	POWER_SUPPLY_TYPE_UPS,
 	POWER_SUPPLY_TYPE_MAINS,
 	POWER_SUPPLY_TYPE_USB,		/* Standard Downstream Port */
-- 
cgit v1.2.3


From 620b2736696743fc785b2fc63dbc0fe69cbfe3a7 Mon Sep 17 00:00:00 2001
From: "Kim, Milo" <Milo.Kim@nsc.com>
Date: Wed, 7 Sep 2011 01:56:14 -0700
Subject: lp8727_charger: Add header file

Oops, forgot to 'git add' it. [AV]

Signed-off-by: Woogyom Kim <milo.kim@ti.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/lp8727.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100755 include/linux/lp8727.h

(limited to 'include/linux')

diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h
new file mode 100755
index 000000000000..3ec558959a11
--- /dev/null
+++ b/include/linux/lp8727.h
@@ -0,0 +1,54 @@
+/*
+ * lp8727.h - Driver for LP8727 Micro/Mini USB IC with intergrated charger
+ *
+ *			Copyright (C) 2011 National Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _LP8727_H
+#define _LP8727_H
+
+enum lp8727_eoc_level {
+	EOC_5P,
+	EOC_10P,
+	EOC_16P,
+	EOC_20P,
+	EOC_25P,
+	EOC_33P,
+	EOC_50P,
+};
+
+enum lp8727_ichg {
+	ICHG_90mA,
+	ICHG_100mA,
+	ICHG_400mA,
+	ICHG_450mA,
+	ICHG_500mA,
+	ICHG_600mA,
+	ICHG_700mA,
+	ICHG_800mA,
+	ICHG_900mA,
+	ICHG_1000mA,
+};
+
+struct lp8727_chg_param {
+	/* end of charge level setting */
+	enum lp8727_eoc_level eoc_level;
+	/* charging current */
+	enum lp8727_ichg ichg;
+};
+
+struct lp8727_platform_data {
+	u8(*get_batt_present) (void);
+	u16(*get_batt_level) (void);
+	u8(*get_batt_capacity) (void);
+	u8(*get_batt_temp) (void);
+	struct lp8727_chg_param ac;
+	struct lp8727_chg_param usb;
+};
+
+#endif
-- 
cgit v1.2.3


From e57b432d0c91a4c472b2826b21524bdbbf2688d1 Mon Sep 17 00:00:00 2001
From: "Milo(Woogyom) Kim" <milo.kim@ti.com>
Date: Wed, 4 Jan 2012 16:23:11 +0400
Subject: lp8727_charger: Some minor fixes for the header

Pointer coding style changes
: add space between return type and function pointer
  ex) u8(*get_batt_present) (void)
   -> u8 (*get_batt_present) (void)

Signed-off-by: Woogyom Kim <milo.kim@ti.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 include/linux/lp8727.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h
index 3ec558959a11..d21fa2865bf4 100755
--- a/include/linux/lp8727.h
+++ b/include/linux/lp8727.h
@@ -1,12 +1,9 @@
 /*
- * lp8727.h - Driver for LP8727 Micro/Mini USB IC with intergrated charger
- *
  *			Copyright (C) 2011 National Semiconductor
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
  */
 
 #ifndef _LP8727_H
@@ -43,10 +40,10 @@ struct lp8727_chg_param {
 };
 
 struct lp8727_platform_data {
-	u8(*get_batt_present) (void);
-	u16(*get_batt_level) (void);
-	u8(*get_batt_capacity) (void);
-	u8(*get_batt_temp) (void);
+	u8 (*get_batt_present)(void);
+	u16 (*get_batt_level)(void);
+	u8 (*get_batt_capacity)(void);
+	u8 (*get_batt_temp)(void);
 	struct lp8727_chg_param ac;
 	struct lp8727_chg_param usb;
 };
-- 
cgit v1.2.3


From 60f98d1839376d30e13f3e452dce2433fad3060e Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Wed, 2 Nov 2011 14:30:58 -0500
Subject: dlm: add recovery callbacks

These new callbacks notify the dlm user about lock recovery.
GFS2, and possibly others, need to be aware of when the dlm
will be doing lock recovery for a failed lockspace member.

In the past, this coordination has been done between dlm and
file system daemons in userspace, which then direct their
kernel counterparts.  These callbacks allow the same
coordination directly, and more simply.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/config.c       | 130 ++++++++++++++++++---------------
 fs/dlm/config.h       |  17 ++++-
 fs/dlm/dlm_internal.h |  21 ++----
 fs/dlm/lockspace.c    |  43 +++++++++--
 fs/dlm/member.c       | 197 ++++++++++++++++++++++++++++++++------------------
 fs/dlm/member.h       |   3 +-
 fs/dlm/recoverd.c     |  10 +--
 fs/dlm/user.c         |   5 +-
 fs/gfs2/lock_dlm.c    |   4 +-
 fs/ocfs2/stack_user.c |   4 +-
 include/linux/dlm.h   |  71 ++++++++++++++++--
 11 files changed, 333 insertions(+), 172 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 6cf72fcc0d0c..e7e327d43fa5 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/dlmconstants.h>
 #include <net/ipv6.h>
 #include <net/sock.h>
 
@@ -36,6 +37,7 @@
 static struct config_group *space_list;
 static struct config_group *comm_list;
 static struct dlm_comm *local_comm;
+static uint32_t dlm_comm_count;
 
 struct dlm_clusters;
 struct dlm_cluster;
@@ -103,6 +105,8 @@ struct dlm_cluster {
 	unsigned int cl_timewarn_cs;
 	unsigned int cl_waitwarn_us;
 	unsigned int cl_new_rsb_count;
+	unsigned int cl_recover_callbacks;
+	char cl_cluster_name[DLM_LOCKSPACE_LEN];
 };
 
 enum {
@@ -118,6 +122,8 @@ enum {
 	CLUSTER_ATTR_TIMEWARN_CS,
 	CLUSTER_ATTR_WAITWARN_US,
 	CLUSTER_ATTR_NEW_RSB_COUNT,
+	CLUSTER_ATTR_RECOVER_CALLBACKS,
+	CLUSTER_ATTR_CLUSTER_NAME,
 };
 
 struct cluster_attribute {
@@ -126,6 +132,27 @@ struct cluster_attribute {
 	ssize_t (*store)(struct dlm_cluster *, const char *, size_t);
 };
 
+static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf)
+{
+	return sprintf(buf, "%s\n", cl->cl_cluster_name);
+}
+
+static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl,
+					  const char *buf, size_t len)
+{
+	strncpy(dlm_config.ci_cluster_name, buf, DLM_LOCKSPACE_LEN);
+	strncpy(cl->cl_cluster_name, buf, DLM_LOCKSPACE_LEN);
+	return len;
+}
+
+static struct cluster_attribute cluster_attr_cluster_name = {
+	.attr   = { .ca_owner = THIS_MODULE,
+                    .ca_name = "cluster_name",
+                    .ca_mode = S_IRUGO | S_IWUSR },
+	.show   = cluster_cluster_name_read,
+	.store  = cluster_cluster_name_write,
+};
+
 static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
 			   int *info_field, int check_zero,
 			   const char *buf, size_t len)
@@ -171,6 +198,7 @@ CLUSTER_ATTR(protocol, 0);
 CLUSTER_ATTR(timewarn_cs, 1);
 CLUSTER_ATTR(waitwarn_us, 0);
 CLUSTER_ATTR(new_rsb_count, 0);
+CLUSTER_ATTR(recover_callbacks, 0);
 
 static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -185,6 +213,8 @@ static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
 	[CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
 	[CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr,
+	[CLUSTER_ATTR_RECOVER_CALLBACKS] = &cluster_attr_recover_callbacks.attr,
+	[CLUSTER_ATTR_CLUSTER_NAME] = &cluster_attr_cluster_name.attr,
 	NULL,
 };
 
@@ -293,6 +323,7 @@ struct dlm_comms {
 
 struct dlm_comm {
 	struct config_item item;
+	int seq;
 	int nodeid;
 	int local;
 	int addr_count;
@@ -309,6 +340,7 @@ struct dlm_node {
 	int nodeid;
 	int weight;
 	int new;
+	int comm_seq; /* copy of cm->seq when nd->nodeid is set */
 };
 
 static struct configfs_group_operations clusters_ops = {
@@ -455,6 +487,9 @@ static struct config_group *make_cluster(struct config_group *g,
 	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
 	cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
 	cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
+	cl->cl_recover_callbacks = dlm_config.ci_recover_callbacks;
+	memcpy(cl->cl_cluster_name, dlm_config.ci_cluster_name,
+	       DLM_LOCKSPACE_LEN);
 
 	space_list = &sps->ss_group;
 	comm_list = &cms->cs_group;
@@ -558,6 +593,11 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
 		return ERR_PTR(-ENOMEM);
 
 	config_item_init_type_name(&cm->item, name, &comm_type);
+
+	cm->seq = dlm_comm_count++;
+	if (!cm->seq)
+		cm->seq = dlm_comm_count++;
+
 	cm->nodeid = -1;
 	cm->local = 0;
 	cm->addr_count = 0;
@@ -801,7 +841,10 @@ static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf)
 static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
 				 size_t len)
 {
+	uint32_t seq = 0;
 	nd->nodeid = simple_strtol(buf, NULL, 0);
+	dlm_comm_seq(nd->nodeid, &seq);
+	nd->comm_seq = seq;
 	return len;
 }
 
@@ -908,13 +951,13 @@ static void put_comm(struct dlm_comm *cm)
 }
 
 /* caller must free mem */
-int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
-		    int **new_out, int *new_count_out)
+int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
+		     int *count_out)
 {
 	struct dlm_space *sp;
 	struct dlm_node *nd;
-	int i = 0, rv = 0, ids_count = 0, new_count = 0;
-	int *ids, *new;
+	struct dlm_config_node *nodes, *node;
+	int rv, count;
 
 	sp = get_space(lsname);
 	if (!sp)
@@ -927,73 +970,42 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
 		goto out;
 	}
 
-	ids_count = sp->members_count;
+	count = sp->members_count;
 
-	ids = kcalloc(ids_count, sizeof(int), GFP_NOFS);
-	if (!ids) {
+	nodes = kcalloc(count, sizeof(struct dlm_config_node), GFP_NOFS);
+	if (!nodes) {
 		rv = -ENOMEM;
 		goto out;
 	}
 
+	node = nodes;
 	list_for_each_entry(nd, &sp->members, list) {
-		ids[i++] = nd->nodeid;
-		if (nd->new)
-			new_count++;
-	}
-
-	if (ids_count != i)
-		printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i);
-
-	if (!new_count)
-		goto out_ids;
+		node->nodeid = nd->nodeid;
+		node->weight = nd->weight;
+		node->new = nd->new;
+		node->comm_seq = nd->comm_seq;
+		node++;
 
-	new = kcalloc(new_count, sizeof(int), GFP_NOFS);
-	if (!new) {
-		kfree(ids);
-		rv = -ENOMEM;
-		goto out;
+		nd->new = 0;
 	}
 
-	i = 0;
-	list_for_each_entry(nd, &sp->members, list) {
-		if (nd->new) {
-			new[i++] = nd->nodeid;
-			nd->new = 0;
-		}
-	}
-	*new_count_out = new_count;
-	*new_out = new;
-
- out_ids:
-	*ids_count_out = ids_count;
-	*ids_out = ids;
+	*count_out = count;
+	*nodes_out = nodes;
+	rv = 0;
  out:
 	mutex_unlock(&sp->members_lock);
 	put_space(sp);
 	return rv;
 }
 
-int dlm_node_weight(char *lsname, int nodeid)
+int dlm_comm_seq(int nodeid, uint32_t *seq)
 {
-	struct dlm_space *sp;
-	struct dlm_node *nd;
-	int w = -EEXIST;
-
-	sp = get_space(lsname);
-	if (!sp)
-		goto out;
-
-	mutex_lock(&sp->members_lock);
-	list_for_each_entry(nd, &sp->members, list) {
-		if (nd->nodeid != nodeid)
-			continue;
-		w = nd->weight;
-		break;
-	}
-	mutex_unlock(&sp->members_lock);
-	put_space(sp);
- out:
-	return w;
+	struct dlm_comm *cm = get_comm(nodeid, NULL);
+	if (!cm)
+		return -EEXIST;
+	*seq = cm->seq;
+	put_comm(cm);
+	return 0;
 }
 
 int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
@@ -1047,6 +1059,8 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 #define DEFAULT_WAITWARN_US	   0
 #define DEFAULT_NEW_RSB_COUNT    128
+#define DEFAULT_RECOVER_CALLBACKS  0
+#define DEFAULT_CLUSTER_NAME      ""
 
 struct dlm_config_info dlm_config = {
 	.ci_tcp_port = DEFAULT_TCP_PORT,
@@ -1060,6 +1074,8 @@ struct dlm_config_info dlm_config = {
 	.ci_protocol = DEFAULT_PROTOCOL,
 	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
 	.ci_waitwarn_us = DEFAULT_WAITWARN_US,
-	.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT
+	.ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT,
+	.ci_recover_callbacks = DEFAULT_RECOVER_CALLBACKS,
+	.ci_cluster_name = DEFAULT_CLUSTER_NAME
 };
 
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 3099d0dd26c0..9f5e3663bb0c 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -14,6 +14,13 @@
 #ifndef __CONFIG_DOT_H__
 #define __CONFIG_DOT_H__
 
+struct dlm_config_node {
+	int nodeid;
+	int weight;
+	int new;
+	uint32_t comm_seq;
+};
+
 #define DLM_MAX_ADDR_COUNT 3
 
 struct dlm_config_info {
@@ -29,15 +36,17 @@ struct dlm_config_info {
 	int ci_timewarn_cs;
 	int ci_waitwarn_us;
 	int ci_new_rsb_count;
+	int ci_recover_callbacks;
+	char ci_cluster_name[DLM_LOCKSPACE_LEN];
 };
 
 extern struct dlm_config_info dlm_config;
 
 int dlm_config_init(void);
 void dlm_config_exit(void);
-int dlm_node_weight(char *lsname, int nodeid);
-int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
-		    int **new_out, int *new_count_out);
+int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
+		     int *count_out);
+int dlm_comm_seq(int nodeid, uint32_t *seq);
 int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
 int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
 int dlm_our_nodeid(void);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index f4d132c76908..3a564d197e99 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2010 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -119,28 +119,18 @@ struct dlm_member {
 	int			weight;
 	int			slot;
 	int			slot_prev;
+	int			comm_seq;
 	uint32_t		generation;
 };
 
-/*
- * low nodeid saves array of these in ls_slots
- */
-
-struct dlm_slot {
-	int			nodeid;
-	int			slot;
-};
-
 /*
  * Save and manage recovery state for a lockspace.
  */
 
 struct dlm_recover {
 	struct list_head	list;
-	int			*nodeids;   /* nodeids of all members */
-	int			node_count;
-	int			*new;       /* nodeids of new members */
-	int			new_count;
+	struct dlm_config_node	*nodes;
+	int			nodes_count;
 	uint64_t		seq;
 };
 
@@ -584,6 +574,9 @@ struct dlm_ls {
 	struct list_head	ls_root_list;	/* root resources */
 	struct rw_semaphore	ls_root_sem;	/* protect root_list */
 
+	const struct dlm_lockspace_ops *ls_ops;
+	void			*ls_ops_arg;
+
 	int			ls_namelen;
 	char			ls_name[1];
 };
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 1441f04bfabe..a1ea25face82 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -386,12 +386,15 @@ static void threads_stop(void)
 	dlm_lowcomms_stop();
 }
 
-static int new_lockspace(const char *name, int namelen, void **lockspace,
-			 uint32_t flags, int lvblen)
+static int new_lockspace(const char *name, const char *cluster,
+			 uint32_t flags, int lvblen,
+			 const struct dlm_lockspace_ops *ops, void *ops_arg,
+			 int *ops_result, dlm_lockspace_t **lockspace)
 {
 	struct dlm_ls *ls;
 	int i, size, error;
 	int do_unreg = 0;
+	int namelen = strlen(name);
 
 	if (namelen > DLM_LOCKSPACE_LEN)
 		return -EINVAL;
@@ -403,8 +406,24 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 		return -EINVAL;
 
 	if (!dlm_user_daemon_available()) {
-		module_put(THIS_MODULE);
-		return -EUNATCH;
+		log_print("dlm user daemon not available");
+		error = -EUNATCH;
+		goto out;
+	}
+
+	if (ops && ops_result) {
+	       	if (!dlm_config.ci_recover_callbacks)
+			*ops_result = -EOPNOTSUPP;
+		else
+			*ops_result = 0;
+	}
+
+	if (dlm_config.ci_recover_callbacks && cluster &&
+	    strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
+		log_print("dlm cluster name %s mismatch %s",
+			  dlm_config.ci_cluster_name, cluster);
+		error = -EBADR;
+		goto out;
 	}
 
 	error = 0;
@@ -442,6 +461,11 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	ls->ls_flags = 0;
 	ls->ls_scan_time = jiffies;
 
+	if (ops && dlm_config.ci_recover_callbacks) {
+		ls->ls_ops = ops;
+		ls->ls_ops_arg = ops_arg;
+	}
+
 	if (flags & DLM_LSFL_TIMEWARN)
 		set_bit(LSFL_TIMEWARN, &ls->ls_flags);
 
@@ -619,8 +643,10 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
 	return error;
 }
 
-int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
-		      uint32_t flags, int lvblen)
+int dlm_new_lockspace(const char *name, const char *cluster,
+		      uint32_t flags, int lvblen,
+		      const struct dlm_lockspace_ops *ops, void *ops_arg,
+		      int *ops_result, dlm_lockspace_t **lockspace)
 {
 	int error = 0;
 
@@ -630,7 +656,8 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
 	if (error)
 		goto out;
 
-	error = new_lockspace(name, namelen, lockspace, flags, lvblen);
+	error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
+			      ops_result, lockspace);
 	if (!error)
 		ls_count++;
 	if (error > 0)
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index eebc52aae82e..862640a36d5c 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005-2009 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -27,7 +27,7 @@ int dlm_slots_version(struct dlm_header *h)
 }
 
 void dlm_slot_save(struct dlm_ls *ls, struct dlm_rcom *rc,
-		  struct dlm_member *memb)
+		   struct dlm_member *memb)
 {
 	struct rcom_config *rf = (struct rcom_config *)rc->rc_buf;
 
@@ -317,59 +317,51 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
 	}
 }
 
-static int dlm_add_member(struct dlm_ls *ls, int nodeid)
+static int dlm_add_member(struct dlm_ls *ls, struct dlm_config_node *node)
 {
 	struct dlm_member *memb;
-	int w, error;
+	int error;
 
 	memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
 	if (!memb)
 		return -ENOMEM;
 
-	w = dlm_node_weight(ls->ls_name, nodeid);
-	if (w < 0) {
-		kfree(memb);
-		return w;
-	}
-
-	error = dlm_lowcomms_connect_node(nodeid);
+	error = dlm_lowcomms_connect_node(node->nodeid);
 	if (error < 0) {
 		kfree(memb);
 		return error;
 	}
 
-	memb->nodeid = nodeid;
-	memb->weight = w;
+	memb->nodeid = node->nodeid;
+	memb->weight = node->weight;
+	memb->comm_seq = node->comm_seq;
 	add_ordered_member(ls, memb);
 	ls->ls_num_nodes++;
 	return 0;
 }
 
-static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb)
-{
-	list_move(&memb->list, &ls->ls_nodes_gone);
-	ls->ls_num_nodes--;
-}
-
-int dlm_is_member(struct dlm_ls *ls, int nodeid)
+static struct dlm_member *find_memb(struct list_head *head, int nodeid)
 {
 	struct dlm_member *memb;
 
-	list_for_each_entry(memb, &ls->ls_nodes, list) {
+	list_for_each_entry(memb, head, list) {
 		if (memb->nodeid == nodeid)
-			return 1;
+			return memb;
 	}
+	return NULL;
+}
+
+int dlm_is_member(struct dlm_ls *ls, int nodeid)
+{
+	if (find_memb(&ls->ls_nodes, nodeid))
+		return 1;
 	return 0;
 }
 
 int dlm_is_removed(struct dlm_ls *ls, int nodeid)
 {
-	struct dlm_member *memb;
-
-	list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
-		if (memb->nodeid == nodeid)
-			return 1;
-	}
+	if (find_memb(&ls->ls_nodes_gone, nodeid))
+		return 1;
 	return 0;
 }
 
@@ -460,10 +452,88 @@ static int ping_members(struct dlm_ls *ls)
 	return error;
 }
 
+static void dlm_lsop_recover_prep(struct dlm_ls *ls)
+{
+	if (!ls->ls_ops || !ls->ls_ops->recover_prep)
+		return;
+	ls->ls_ops->recover_prep(ls->ls_ops_arg);
+}
+
+static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
+{
+	struct dlm_slot slot;
+	uint32_t seq;
+	int error;
+
+	if (!ls->ls_ops || !ls->ls_ops->recover_slot)
+		return;
+
+	/* if there is no comms connection with this node
+	   or the present comms connection is newer
+	   than the one when this member was added, then
+	   we consider the node to have failed (versus
+	   being removed due to dlm_release_lockspace) */
+
+	error = dlm_comm_seq(memb->nodeid, &seq);
+
+	if (!error && seq == memb->comm_seq)
+		return;
+
+	slot.nodeid = memb->nodeid;
+	slot.slot = memb->slot;
+
+	ls->ls_ops->recover_slot(ls->ls_ops_arg, &slot);
+}
+
+void dlm_lsop_recover_done(struct dlm_ls *ls)
+{
+	struct dlm_member *memb;
+	struct dlm_slot *slots;
+	int i, num;
+
+	if (!ls->ls_ops || !ls->ls_ops->recover_done)
+		return;
+
+	num = ls->ls_num_nodes;
+
+	slots = kzalloc(num * sizeof(struct dlm_slot), GFP_KERNEL);
+	if (!slots)
+		return;
+
+	i = 0;
+	list_for_each_entry(memb, &ls->ls_nodes, list) {
+		if (i == num) {
+			log_error(ls, "dlm_lsop_recover_done bad num %d", num);
+			goto out;
+		}
+		slots[i].nodeid = memb->nodeid;
+		slots[i].slot = memb->slot;
+		i++;
+	}
+
+	ls->ls_ops->recover_done(ls->ls_ops_arg, slots, num,
+				 ls->ls_slot, ls->ls_generation);
+ out:
+	kfree(slots);
+}
+
+static struct dlm_config_node *find_config_node(struct dlm_recover *rv,
+						int nodeid)
+{
+	int i;
+
+	for (i = 0; i < rv->nodes_count; i++) {
+		if (rv->nodes[i].nodeid == nodeid)
+			return &rv->nodes[i];
+	}
+	return NULL;
+}
+
 int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 {
 	struct dlm_member *memb, *safe;
-	int i, error, found, pos = 0, neg = 0, low = -1;
+	struct dlm_config_node *node;
+	int i, error, neg = 0, low = -1;
 
 	/* previously removed members that we've not finished removing need to
 	   count as a negative change so the "neg" recovery steps will happen */
@@ -476,46 +546,32 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 	/* move departed members from ls_nodes to ls_nodes_gone */
 
 	list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
-		found = 0;
-		for (i = 0; i < rv->node_count; i++) {
-			if (memb->nodeid == rv->nodeids[i]) {
-				found = 1;
-				break;
-			}
-		}
+		node = find_config_node(rv, memb->nodeid);
+		if (node && !node->new)
+			continue;
 
-		if (!found) {
-			neg++;
-			dlm_remove_member(ls, memb);
+		if (!node) {
 			log_debug(ls, "remove member %d", memb->nodeid);
+		} else {
+			/* removed and re-added */
+			log_debug(ls, "remove member %d comm_seq %u %u",
+				  memb->nodeid, memb->comm_seq, node->comm_seq);
 		}
-	}
-
-	/* Add an entry to ls_nodes_gone for members that were removed and
-	   then added again, so that previous state for these nodes will be
-	   cleared during recovery. */
 
-	for (i = 0; i < rv->new_count; i++) {
-		if (!dlm_is_member(ls, rv->new[i]))
-			continue;
-		log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
-
-		memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
-		if (!memb)
-			return -ENOMEM;
-		memb->nodeid = rv->new[i];
-		list_add_tail(&memb->list, &ls->ls_nodes_gone);
 		neg++;
+		list_move(&memb->list, &ls->ls_nodes_gone);
+		ls->ls_num_nodes--;
+		dlm_lsop_recover_slot(ls, memb);
 	}
 
 	/* add new members to ls_nodes */
 
-	for (i = 0; i < rv->node_count; i++) {
-		if (dlm_is_member(ls, rv->nodeids[i]))
+	for (i = 0; i < rv->nodes_count; i++) {
+		node = &rv->nodes[i];
+		if (dlm_is_member(ls, node->nodeid))
 			continue;
-		dlm_add_member(ls, rv->nodeids[i]);
-		pos++;
-		log_debug(ls, "add member %d", rv->nodeids[i]);
+		dlm_add_member(ls, node);
+		log_debug(ls, "add member %d", node->nodeid);
 	}
 
 	list_for_each_entry(memb, &ls->ls_nodes, list) {
@@ -609,21 +665,22 @@ int dlm_ls_stop(struct dlm_ls *ls)
 
 	if (!ls->ls_recover_begin)
 		ls->ls_recover_begin = jiffies;
+
+	dlm_lsop_recover_prep(ls);
 	return 0;
 }
 
 int dlm_ls_start(struct dlm_ls *ls)
 {
 	struct dlm_recover *rv = NULL, *rv_old;
-	int *ids = NULL, *new = NULL;
-	int error, ids_count = 0, new_count = 0;
+	struct dlm_config_node *nodes;
+	int error, count;
 
 	rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS);
 	if (!rv)
 		return -ENOMEM;
 
-	error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count,
-				&new, &new_count);
+	error = dlm_config_nodes(ls->ls_name, &nodes, &count);
 	if (error < 0)
 		goto fail;
 
@@ -638,10 +695,8 @@ int dlm_ls_start(struct dlm_ls *ls)
 		goto fail;
 	}
 
-	rv->nodeids = ids;
-	rv->node_count = ids_count;
-	rv->new = new;
-	rv->new_count = new_count;
+	rv->nodes = nodes;
+	rv->nodes_count = count;
 	rv->seq = ++ls->ls_recover_seq;
 	rv_old = ls->ls_recover_args;
 	ls->ls_recover_args = rv;
@@ -649,9 +704,8 @@ int dlm_ls_start(struct dlm_ls *ls)
 
 	if (rv_old) {
 		log_error(ls, "unused recovery %llx %d",
-			  (unsigned long long)rv_old->seq, rv_old->node_count);
-		kfree(rv_old->nodeids);
-		kfree(rv_old->new);
+			  (unsigned long long)rv_old->seq, rv_old->nodes_count);
+		kfree(rv_old->nodes);
 		kfree(rv_old);
 	}
 
@@ -660,8 +714,7 @@ int dlm_ls_start(struct dlm_ls *ls)
 
  fail:
 	kfree(rv);
-	kfree(ids);
-	kfree(new);
+	kfree(nodes);
 	return error;
 }
 
diff --git a/fs/dlm/member.h b/fs/dlm/member.h
index 7e87e8a79dfd..3deb70661c69 100644
--- a/fs/dlm/member.h
+++ b/fs/dlm/member.h
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -27,6 +27,7 @@ void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_slots_copy_in(struct dlm_ls *ls);
 int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size,
 		     struct dlm_slot **slots_out, uint32_t *gen_out);
+void dlm_lsop_recover_done(struct dlm_ls *ls);
 
 #endif                          /* __MEMBER_DOT_H__ */
 
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 5a9e1a49a860..3780caf7ae0c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -227,11 +227,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_grant_after_purge(ls);
 
-	log_debug(ls, "dlm_recover %llx done: %u ms",
-		  (unsigned long long)rv->seq,
+	log_debug(ls, "dlm_recover %llx generation %u done: %u ms",
+		  (unsigned long long)rv->seq, ls->ls_generation,
 		  jiffies_to_msecs(jiffies - start));
 	mutex_unlock(&ls->ls_recoverd_active);
 
+	dlm_lsop_recover_done(ls);
 	return 0;
 
  fail:
@@ -259,8 +260,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
 
 	if (rv) {
 		ls_recover(ls, rv);
-		kfree(rv->nodeids);
-		kfree(rv->new);
+		kfree(rv->nodes);
 		kfree(rv);
 	}
 }
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index d8ea60756403..eb4ed9ba3098 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -392,8 +392,9 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	error = dlm_new_lockspace(params->name, strlen(params->name),
-				  &lockspace, params->flags, DLM_USER_LVB_LEN);
+	error = dlm_new_lockspace(params->name, NULL, params->flags,
+				  DLM_USER_LVB_LEN, NULL, NULL, NULL,
+				  &lockspace);
 	if (error)
 		return error;
 
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 98c80d8c2a62..ce85b62bc0a2 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -195,10 +195,10 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname)
 		return -EINVAL;
 	}
 
-	error = dlm_new_lockspace(fsname, strlen(fsname), &ls->ls_dlm,
+	error = dlm_new_lockspace(fsname, NULL, 
 				  DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
 				  (ls->ls_nodir ? DLM_LSFL_NODIR : 0),
-				  GDLM_LVB_SIZE);
+				  GDLM_LVB_SIZE, NULL, NULL, NULL, &ls->ls_dlm);
 	if (error)
 		printk(KERN_ERR "dlm_new_lockspace error %d", error);
 
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index a5ebe421195f..286edf1e231f 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -827,8 +827,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
 		goto out;
 	}
 
-	rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name),
-			       &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN);
+	rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
+			       NULL, NULL, NULL, &fsdlm);
 	if (rc) {
 		ocfs2_live_connection_drop(control);
 		goto out;
diff --git a/include/linux/dlm.h b/include/linux/dlm.h
index d4e02f5353a0..6c7f6e9546c7 100644
--- a/include/linux/dlm.h
+++ b/include/linux/dlm.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2011 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -74,15 +74,76 @@ struct dlm_lksb {
 
 #ifdef __KERNEL__
 
+struct dlm_slot {
+	int nodeid; /* 1 to MAX_INT */
+	int slot;   /* 1 to MAX_INT */
+};
+
+/*
+ * recover_prep: called before the dlm begins lock recovery.
+ *   Notfies lockspace user that locks from failed members will be granted.
+ * recover_slot: called after recover_prep and before recover_done.
+ *   Identifies a failed lockspace member.
+ * recover_done: called after the dlm completes lock recovery.
+ *   Identifies lockspace members and lockspace generation number.
+ */
+
+struct dlm_lockspace_ops {
+	void (*recover_prep) (void *ops_arg);
+	void (*recover_slot) (void *ops_arg, struct dlm_slot *slot);
+	void (*recover_done) (void *ops_arg, struct dlm_slot *slots,
+			      int num_slots, int our_slot, uint32_t generation);
+};
+
 /*
  * dlm_new_lockspace
  *
- * Starts a lockspace with the given name.  If the named lockspace exists in
- * the cluster, the calling node joins it.
+ * Create/join a lockspace.
+ *
+ * name: lockspace name, null terminated, up to DLM_LOCKSPACE_LEN (not
+ *   including terminating null).
+ *
+ * cluster: cluster name, null terminated, up to DLM_LOCKSPACE_LEN (not
+ *   including terminating null).  Optional.  When cluster is null, it
+ *   is not used.  When set, dlm_new_lockspace() returns -EBADR if cluster
+ *   is not equal to the dlm cluster name.
+ *
+ * flags:
+ * DLM_LSFL_NODIR
+ *   The dlm should not use a resource directory, but statically assign
+ *   resource mastery to nodes based on the name hash that is otherwise
+ *   used to select the directory node.  Must be the same on all nodes.
+ * DLM_LSFL_TIMEWARN
+ *   The dlm should emit netlink messages if locks have been waiting
+ *   for a configurable amount of time.  (Unused.)
+ * DLM_LSFL_FS
+ *   The lockspace user is in the kernel (i.e. filesystem).  Enables
+ *   direct bast/cast callbacks.
+ * DLM_LSFL_NEWEXCL
+ *   dlm_new_lockspace() should return -EEXIST if the lockspace exists.
+ *
+ * lvblen: length of lvb in bytes.  Must be multiple of 8.
+ *   dlm_new_lockspace() returns an error if this does not match
+ *   what other nodes are using.
+ *
+ * ops: callbacks that indicate lockspace recovery points so the
+ *   caller can coordinate its recovery and know lockspace members.
+ *   This is only used by the initial dlm_new_lockspace() call.
+ *   Optional.
+ *
+ * ops_arg: arg for ops callbacks.
+ *
+ * ops_result: tells caller if the ops callbacks (if provided) will
+ *   be used or not.  0: will be used, -EXXX will not be used.
+ *   -EOPNOTSUPP: the dlm does not have recovery_callbacks enabled.
+ *
+ * lockspace: handle for dlm functions
  */
 
-int dlm_new_lockspace(const char *name, int namelen,
-		      dlm_lockspace_t **lockspace, uint32_t flags, int lvblen);
+int dlm_new_lockspace(const char *name, const char *cluster,
+		      uint32_t flags, int lvblen,
+		      const struct dlm_lockspace_ops *ops, void *ops_arg,
+		      int *ops_result, dlm_lockspace_t **lockspace);
 
 /*
  * dlm_release_lockspace
-- 
cgit v1.2.3


From f2ab2ba09e081fbce068c0adc205ad3f25a3b626 Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Fri, 9 Dec 2011 14:11:41 -0600
Subject: gpio: pl061: convert to use 0 for no irq

We don't want drivers using NO_IRQ, so remove its use. For now, 0 or
-1 means no irq until platforms are converted to use 0.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Linus Walleij <linus.ml.walleij@gmail.com>
---
 drivers/gpio/gpio-pl061.c  | 8 ++++----
 include/linux/amba/pl061.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index 0f718f9bbd8c..fe19dec4b117 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -53,7 +53,7 @@ struct pl061_gpio {
 	spinlock_t		irq_lock;	/* IRQ registers */
 
 	void __iomem		*base;
-	unsigned		irq_base;
+	int			irq_base;
 	struct gpio_chip	gc;
 };
 
@@ -119,7 +119,7 @@ static int pl061_to_irq(struct gpio_chip *gc, unsigned offset)
 {
 	struct pl061_gpio *chip = container_of(gc, struct pl061_gpio, gc);
 
-	if (chip->irq_base == NO_IRQ)
+	if (chip->irq_base <= 0)
 		return -EINVAL;
 
 	return chip->irq_base + offset;
@@ -250,7 +250,7 @@ static int pl061_probe(struct amba_device *dev, const struct amba_id *id)
 		chip->irq_base = pdata->irq_base;
 	} else if (dev->dev.of_node) {
 		chip->gc.base = -1;
-		chip->irq_base = NO_IRQ;
+		chip->irq_base = 0;
 	} else {
 		ret = -ENODEV;
 		goto free_mem;
@@ -290,7 +290,7 @@ static int pl061_probe(struct amba_device *dev, const struct amba_id *id)
 	 * irq_chip support
 	 */
 
-	if (chip->irq_base == NO_IRQ)
+	if (chip->irq_base <= 0)
 		return 0;
 
 	writeb(0, chip->base + GPIOIE); /* disable irqs */
diff --git a/include/linux/amba/pl061.h b/include/linux/amba/pl061.h
index 2412af944f1f..fb83c0453489 100644
--- a/include/linux/amba/pl061.h
+++ b/include/linux/amba/pl061.h
@@ -7,7 +7,7 @@ struct pl061_platform_data {
 	unsigned	gpio_base;
 
 	/* number of the first IRQ.
-	 * If the IRQ functionality in not desired this must be set to NO_IRQ.
+	 * If the IRQ functionality in not desired this must be set to 0.
 	 */
 	unsigned	irq_base;
 
-- 
cgit v1.2.3


From cd7d494d0b23673215330963c28138dd0c3fd405 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sun, 1 Jan 2012 16:11:17 -0300
Subject: [media] dvb: deprecate the usage of ops->info.type

Mark info.type as deprecated inside the header, recommending
the usage of DTV_ENUM_DELSYS DVBv5 command instead.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/dvb/frontend.xml | 4 ++++
 include/linux/dvb/frontend.h                 | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 28d7ea5d5e73..aeaed59d0f1f 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -63,6 +63,10 @@ transmission. The fontend types are given by fe_type_t type, defined as:</para>
 <para>Newer formats like DVB-S2, ISDB-T, ISDB-S and DVB-T2 are not described at the above, as they're
 supported via the new <link linkend="FE_GET_SET_PROPERTY">FE_GET_PROPERTY/FE_GET_SET_PROPERTY</link> ioctl's, using the <link linkend="DTV-DELIVERY-SYSTEM">DTV_DELIVERY_SYSTEM</link> parameter.
 </para>
+
+<para>The usage of this field is deprecated, as it doesn't report all supported standards, and
+will provide an incomplete information for frontends that support multiple delivery systems.
+Please use <link linkend="DTV_ENUM_DELSYS">DTV_ENUM_DELSYS</link> instead.</para>
 </section>
 
 <section id="fe-caps-t">
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 7e7cb64f56d8..cb4428ab81ed 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -72,7 +72,7 @@ typedef enum fe_caps {
 
 struct dvb_frontend_info {
 	char       name[128];
-	fe_type_t  type;
+	fe_type_t  type;			/* DEPRECATED. Use DTV_ENUM_DELSYS instead */
 	__u32      frequency_min;
 	__u32      frequency_max;
 	__u32      frequency_stepsize;
-- 
cgit v1.2.3


From c899afedf168b6735911997d8366b7f23e7e59bc Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 4 Jan 2012 22:18:42 -0800
Subject: Input: ucb1400_ts - convert to threaded IRQ

Instead of manually create and handler kernel thread switch to threaded
IRQ and let kernel IRQ core manage thread for us.

Acked-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ucb1400_ts.c | 235 ++++++++++++++++-----------------
 include/linux/ucb1400.h                |   6 +-
 2 files changed, 115 insertions(+), 126 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 36ff1549434b..5162f4e34252 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -20,24 +20,24 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/completion.h>
 #include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
 #include <linux/input.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
-#include <linux/suspend.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
 #include <linux/ucb1400.h>
 
+#define UCB1400_TS_POLL_PERIOD	10 /* ms */
+
 static int adcsync;
 static int ts_delay = 55; /* us */
 static int ts_delay_pressure;	/* us */
 
 /* Switch to interrupt mode. */
-static void ucb1400_ts_mode_int(struct snd_ac97 *ac97)
+static void ucb1400_ts_mode_int(struct ucb1400_ts *ucb)
 {
-	ucb1400_reg_write(ac97, UCB_TS_CR,
+	ucb1400_reg_write(ucb->ac97, UCB_TS_CR,
 			UCB_TS_CR_TSMX_POW | UCB_TS_CR_TSPX_POW |
 			UCB_TS_CR_TSMY_GND | UCB_TS_CR_TSPY_GND |
 			UCB_TS_CR_MODE_INT);
@@ -53,7 +53,9 @@ static unsigned int ucb1400_ts_read_pressure(struct ucb1400_ts *ucb)
 			UCB_TS_CR_TSMX_POW | UCB_TS_CR_TSPX_POW |
 			UCB_TS_CR_TSMY_GND | UCB_TS_CR_TSPY_GND |
 			UCB_TS_CR_MODE_PRES | UCB_TS_CR_BIAS_ENA);
+
 	udelay(ts_delay_pressure);
+
 	return ucb1400_adc_read(ucb->ac97, UCB_ADC_INP_TSPY, adcsync);
 }
 
@@ -127,26 +129,26 @@ static unsigned int ucb1400_ts_read_yres(struct ucb1400_ts *ucb)
 	return ucb1400_adc_read(ucb->ac97, 0, adcsync);
 }
 
-static int ucb1400_ts_pen_up(struct snd_ac97 *ac97)
+static int ucb1400_ts_pen_up(struct ucb1400_ts *ucb)
 {
-	unsigned short val = ucb1400_reg_read(ac97, UCB_TS_CR);
+	unsigned short val = ucb1400_reg_read(ucb->ac97, UCB_TS_CR);
 
 	return val & (UCB_TS_CR_TSPX_LOW | UCB_TS_CR_TSMX_LOW);
 }
 
-static void ucb1400_ts_irq_enable(struct snd_ac97 *ac97)
+static void ucb1400_ts_irq_enable(struct ucb1400_ts *ucb)
 {
-	ucb1400_reg_write(ac97, UCB_IE_CLEAR, UCB_IE_TSPX);
-	ucb1400_reg_write(ac97, UCB_IE_CLEAR, 0);
-	ucb1400_reg_write(ac97, UCB_IE_FAL, UCB_IE_TSPX);
+	ucb1400_reg_write(ucb->ac97, UCB_IE_CLEAR, UCB_IE_TSPX);
+	ucb1400_reg_write(ucb->ac97, UCB_IE_CLEAR, 0);
+	ucb1400_reg_write(ucb->ac97, UCB_IE_FAL, UCB_IE_TSPX);
 }
 
-static void ucb1400_ts_irq_disable(struct snd_ac97 *ac97)
+static void ucb1400_ts_irq_disable(struct ucb1400_ts *ucb)
 {
-	ucb1400_reg_write(ac97, UCB_IE_FAL, 0);
+	ucb1400_reg_write(ucb->ac97, UCB_IE_FAL, 0);
 }
 
-static void ucb1400_ts_evt_add(struct input_dev *idev, u16 pressure, u16 x, u16 y)
+static void ucb1400_ts_report_event(struct input_dev *idev, u16 pressure, u16 x, u16 y)
 {
 	input_report_abs(idev, ABS_X, x);
 	input_report_abs(idev, ABS_Y, y);
@@ -162,7 +164,7 @@ static void ucb1400_ts_event_release(struct input_dev *idev)
 	input_sync(idev);
 }
 
-static void ucb1400_handle_pending_irq(struct ucb1400_ts *ucb)
+static void ucb1400_clear_pending_irq(struct ucb1400_ts *ucb)
 {
 	unsigned int isr;
 
@@ -171,32 +173,34 @@ static void ucb1400_handle_pending_irq(struct ucb1400_ts *ucb)
 	ucb1400_reg_write(ucb->ac97, UCB_IE_CLEAR, 0);
 
 	if (isr & UCB_IE_TSPX)
-		ucb1400_ts_irq_disable(ucb->ac97);
+		ucb1400_ts_irq_disable(ucb);
 	else
-		dev_dbg(&ucb->ts_idev->dev, "ucb1400: unexpected IE_STATUS = %#x\n", isr);
-	enable_irq(ucb->irq);
+		dev_dbg(&ucb->ts_idev->dev,
+			"ucb1400: unexpected IE_STATUS = %#x\n", isr);
 }
 
-static int ucb1400_ts_thread(void *_ucb)
+/*
+ * A restriction with interrupts exists when using the ucb1400, as
+ * the codec read/write routines may sleep while waiting for codec
+ * access completion and uses semaphores for access control to the
+ * AC97 bus. Therefore the driver is forced to use threaded interrupt
+ * handler.
+ */
+static irqreturn_t ucb1400_irq(int irqnr, void *devid)
 {
-	struct ucb1400_ts *ucb = _ucb;
-	struct task_struct *tsk = current;
-	int valid = 0;
-	struct sched_param param = { .sched_priority = 1 };
+	struct ucb1400_ts *ucb = devid;
+	unsigned int x, y, p;
+	bool penup;
 
-	sched_setscheduler(tsk, SCHED_FIFO, &param);
+	if (unlikely(irqnr != ucb->irq))
+		return IRQ_NONE;
 
-	set_freezable();
-	while (!kthread_should_stop()) {
-		unsigned int x, y, p;
-		long timeout;
+	ucb1400_clear_pending_irq(ucb);
 
-		ucb->ts_restart = 0;
+	/* Start with a small delay before checking pendown state */
+	msleep(UCB1400_TS_POLL_PERIOD);
 
-		if (ucb->irq_pending) {
-			ucb->irq_pending = 0;
-			ucb1400_handle_pending_irq(ucb);
-		}
+	while (!ucb->stopped && !(penup = ucb1400_ts_pen_up(ucb))) {
 
 		ucb1400_adc_enable(ucb->ac97);
 		x = ucb1400_ts_read_xpos(ucb);
@@ -204,91 +208,62 @@ static int ucb1400_ts_thread(void *_ucb)
 		p = ucb1400_ts_read_pressure(ucb);
 		ucb1400_adc_disable(ucb->ac97);
 
-		/* Switch back to interrupt mode. */
-		ucb1400_ts_mode_int(ucb->ac97);
-
-		msleep(10);
-
-		if (ucb1400_ts_pen_up(ucb->ac97)) {
-			ucb1400_ts_irq_enable(ucb->ac97);
-
-			/*
-			 * If we spat out a valid sample set last time,
-			 * spit out a "pen off" sample here.
-			 */
-			if (valid) {
-				ucb1400_ts_event_release(ucb->ts_idev);
-				valid = 0;
-			}
-
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		} else {
-			valid = 1;
-			ucb1400_ts_evt_add(ucb->ts_idev, p, x, y);
-			timeout = msecs_to_jiffies(10);
-		}
+		ucb1400_ts_report_event(ucb->ts_idev, p, x, y);
 
-		wait_event_freezable_timeout(ucb->ts_wait,
-			ucb->irq_pending || ucb->ts_restart ||
-			kthread_should_stop(), timeout);
+		wait_event_timeout(ucb->ts_wait, ucb->stopped,
+				   msecs_to_jiffies(UCB1400_TS_POLL_PERIOD));
 	}
 
-	/* Send the "pen off" if we are stopping with the pen still active */
-	if (valid)
-		ucb1400_ts_event_release(ucb->ts_idev);
+	ucb1400_ts_event_release(ucb->ts_idev);
 
-	ucb->ts_task = NULL;
-	return 0;
+	if (!ucb->stopped) {
+		/* Switch back to interrupt mode. */
+		ucb1400_ts_mode_int(ucb);
+		ucb1400_ts_irq_enable(ucb);
+	}
+
+	return IRQ_HANDLED;
 }
 
-/*
- * A restriction with interrupts exists when using the ucb1400, as
- * the codec read/write routines may sleep while waiting for codec
- * access completion and uses semaphores for access control to the
- * AC97 bus.  A complete codec read cycle could take  anywhere from
- * 60 to 100uSec so we *definitely* don't want to spin inside the
- * interrupt handler waiting for codec access.  So, we handle the
- * interrupt by scheduling a RT kernel thread to run in process
- * context instead of interrupt context.
- */
-static irqreturn_t ucb1400_hard_irq(int irqnr, void *devid)
+static void ucb1400_ts_stop(struct ucb1400_ts *ucb)
 {
-	struct ucb1400_ts *ucb = devid;
+	/* Signal IRQ thread to stop polling and disable the handler. */
+	ucb->stopped = true;
+	mb();
+	wake_up(&ucb->ts_wait);
+	disable_irq(ucb->irq);
 
-	if (irqnr == ucb->irq) {
-		disable_irq_nosync(ucb->irq);
-		ucb->irq_pending = 1;
-		wake_up(&ucb->ts_wait);
-		return IRQ_HANDLED;
-	}
-	return IRQ_NONE;
+	ucb1400_ts_irq_disable(ucb);
+	ucb1400_reg_write(ucb->ac97, UCB_TS_CR, 0);
+}
+
+/* Must be called with ts->lock held */
+static void ucb1400_ts_start(struct ucb1400_ts *ucb)
+{
+	/* Tell IRQ thread that it may poll the device. */
+	ucb->stopped = false;
+	mb();
+
+	ucb1400_ts_mode_int(ucb);
+	ucb1400_ts_irq_enable(ucb);
+
+	enable_irq(ucb->irq);
 }
 
 static int ucb1400_ts_open(struct input_dev *idev)
 {
 	struct ucb1400_ts *ucb = input_get_drvdata(idev);
-	int ret = 0;
-
-	BUG_ON(ucb->ts_task);
 
-	ucb->ts_task = kthread_run(ucb1400_ts_thread, ucb, "UCB1400_ts");
-	if (IS_ERR(ucb->ts_task)) {
-		ret = PTR_ERR(ucb->ts_task);
-		ucb->ts_task = NULL;
-	}
+	ucb1400_ts_start(ucb);
 
-	return ret;
+	return 0;
 }
 
 static void ucb1400_ts_close(struct input_dev *idev)
 {
 	struct ucb1400_ts *ucb = input_get_drvdata(idev);
 
-	if (ucb->ts_task)
-		kthread_stop(ucb->ts_task);
-
-	ucb1400_ts_irq_disable(ucb->ac97);
-	ucb1400_reg_write(ucb->ac97, UCB_TS_CR, 0);
+	ucb1400_ts_stop(ucb);
 }
 
 #ifndef NO_IRQ
@@ -342,11 +317,11 @@ static int __devinit ucb1400_ts_detect_irq(struct ucb1400_ts *ucb)
 	return 0;
 }
 
-static int __devinit ucb1400_ts_probe(struct platform_device *dev)
+static int __devinit ucb1400_ts_probe(struct platform_device *pdev)
 {
+	struct ucb1400_ts *ucb = pdev->dev.platform_data;
 	int error, x_res, y_res;
 	u16 fcsr;
-	struct ucb1400_ts *ucb = dev->dev.platform_data;
 
 	ucb->ts_idev = input_allocate_device();
 	if (!ucb->ts_idev) {
@@ -362,21 +337,13 @@ static int __devinit ucb1400_ts_probe(struct platform_device *dev)
 			goto err_free_devs;
 		}
 	}
+	printk(KERN_DEBUG "UCB1400: found IRQ %d\n", ucb->irq);
 
 	init_waitqueue_head(&ucb->ts_wait);
 
-	error = request_irq(ucb->irq, ucb1400_hard_irq, IRQF_TRIGGER_RISING,
-				"UCB1400", ucb);
-	if (error) {
-		printk(KERN_ERR "ucb1400: unable to grab irq%d: %d\n",
-				ucb->irq, error);
-		goto err_free_devs;
-	}
-	printk(KERN_DEBUG "UCB1400: found IRQ %d\n", ucb->irq);
-
 	input_set_drvdata(ucb->ts_idev, ucb);
 
-	ucb->ts_idev->dev.parent	= &dev->dev;
+	ucb->ts_idev->dev.parent	= &pdev->dev;
 	ucb->ts_idev->name		= "UCB1400 touchscreen interface";
 	ucb->ts_idev->id.vendor		= ucb1400_reg_read(ucb->ac97,
 						AC97_VENDOR_ID1);
@@ -404,6 +371,17 @@ static int __devinit ucb1400_ts_probe(struct platform_device *dev)
 	input_set_abs_params(ucb->ts_idev, ABS_Y, 0, y_res, 0, 0);
 	input_set_abs_params(ucb->ts_idev, ABS_PRESSURE, 0, 0, 0, 0);
 
+	ucb1400_ts_stop(ucb);
+
+	error = request_threaded_irq(ucb->irq, NULL, ucb1400_irq,
+				     IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+				     "UCB1400", ucb);
+	if (error) {
+		printk(KERN_ERR "ucb1400: unable to grab irq%d: %d\n",
+				ucb->irq, error);
+		goto err_free_devs;
+	}
+
 	error = input_register_device(ucb->ts_idev);
 	if (error)
 		goto err_free_irq;
@@ -418,9 +396,9 @@ err:
 	return error;
 }
 
-static int __devexit ucb1400_ts_remove(struct platform_device *dev)
+static int __devexit ucb1400_ts_remove(struct platform_device *pdev)
 {
-	struct ucb1400_ts *ucb = dev->dev.platform_data;
+	struct ucb1400_ts *ucb = pdev->dev.platform_data;
 
 	free_irq(ucb->irq, ucb);
 	input_unregister_device(ucb->ts_idev);
@@ -429,24 +407,37 @@ static int __devexit ucb1400_ts_remove(struct platform_device *dev)
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int ucb1400_ts_suspend(struct device *dev)
+{
+	struct ucb1400_ts *ucb = dev->platform_data;
+	struct input_dev *idev = ucb->ts_idev;
+
+	mutex_lock(&idev->mutex);
+
+	if (idev->users)
+		ucb1400_ts_start(ucb);
+
+	mutex_unlock(&idev->mutex);
+	return 0;
+}
+
 static int ucb1400_ts_resume(struct device *dev)
 {
 	struct ucb1400_ts *ucb = dev->platform_data;
+	struct input_dev *idev = ucb->ts_idev;
 
-	if (ucb->ts_task) {
-		/*
-		 * Restart the TS thread to ensure the
-		 * TS interrupt mode is set up again
-		 * after sleep.
-		 */
-		ucb->ts_restart = 1;
-		wake_up(&ucb->ts_wait);
-	}
+	mutex_lock(&idev->mutex);
+
+	if (idev->users)
+		ucb1400_ts_stop(ucb);
+
+	mutex_unlock(&idev->mutex);
 	return 0;
 }
 #endif
 
-static SIMPLE_DEV_PM_OPS(ucb1400_ts_pm_ops, NULL, ucb1400_ts_resume);
+static SIMPLE_DEV_PM_OPS(ucb1400_ts_pm_ops,
+			 ucb1400_ts_suspend, ucb1400_ts_resume);
 
 static struct platform_driver ucb1400_ts_driver = {
 	.probe	= ucb1400_ts_probe,
diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h
index 5c75153f9441..d21b33c4c6ca 100644
--- a/include/linux/ucb1400.h
+++ b/include/linux/ucb1400.h
@@ -96,13 +96,11 @@ struct ucb1400_gpio {
 
 struct ucb1400_ts {
 	struct input_dev	*ts_idev;
-	struct task_struct	*ts_task;
 	int			id;
-	wait_queue_head_t	ts_wait;
-	unsigned int		ts_restart:1;
 	int			irq;
-	unsigned int		irq_pending;	/* not bit field shared */
 	struct snd_ac97		*ac97;
+	wait_queue_head_t	ts_wait;
+	bool			stopped;
 };
 
 struct ucb1400 {
-- 
cgit v1.2.3


From 68c97153fb7f2877f98aa6c29546381d9cad2fed Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2012 13:22:46 -0500
Subject: SUNRPC: Clean up the RPCSEC_GSS service ticket requests

Instead of hacking specific service names into gss_encode_v1_msg, we should
just allow the caller to specify the service name explicitly.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/client.c                 |  2 +-
 fs/nfsd/nfs4callback.c          |  2 +-
 include/linux/sunrpc/auth.h     |  3 ++-
 include/linux/sunrpc/auth_gss.h |  2 +-
 net/sunrpc/auth_generic.c       |  6 ++++--
 net/sunrpc/auth_gss/auth_gss.c  | 40 +++++++++++++++++++++++-----------------
 6 files changed, 32 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 873bf00d51a2..32ea37198e93 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -185,7 +185,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_minorversion = cl_init->minorversion;
 	clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
 #endif
-	cred = rpc_lookup_machine_cred();
+	cred = rpc_lookup_machine_cred("*");
 	if (!IS_ERR(cred))
 		clp->cl_machine_cred = cred;
 	nfs_fscache_get_client_cookie(clp);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7748d6a18d97..6f3ebb48b12f 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -718,7 +718,7 @@ int set_callback_cred(void)
 {
 	if (callback_cred)
 		return 0;
-	callback_cred = rpc_lookup_machine_cred();
+	callback_cred = rpc_lookup_machine_cred("nfs");
 	if (!callback_cred)
 		return -ENOMEM;
 	return 0;
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index febc4dbec2ca..7874a8a56638 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -26,6 +26,7 @@ struct auth_cred {
 	uid_t	uid;
 	gid_t	gid;
 	struct group_info *group_info;
+	const char *principal;
 	unsigned char machine_cred : 1;
 };
 
@@ -127,7 +128,7 @@ void			rpc_destroy_generic_auth(void);
 void 			rpc_destroy_authunix(void);
 
 struct rpc_cred *	rpc_lookup_cred(void);
-struct rpc_cred *	rpc_lookup_machine_cred(void);
+struct rpc_cred *	rpc_lookup_machine_cred(const char *service_name);
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
 struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 8eee9dbbfe7a..f1cfd4c85cd0 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -82,8 +82,8 @@ struct gss_cred {
 	enum rpc_gss_svc	gc_service;
 	struct gss_cl_ctx __rcu	*gc_ctx;
 	struct gss_upcall_msg	*gc_upcall;
+	const char		*gc_principal;
 	unsigned long		gc_upcall_timestamp;
-	unsigned char		gc_machine_cred : 1;
 };
 
 #endif /* __KERNEL__ */
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index e010a015d996..1426ec3d0a53 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -41,15 +41,17 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred);
 /*
  * Public call interface for looking up machine creds.
  */
-struct rpc_cred *rpc_lookup_machine_cred(void)
+struct rpc_cred *rpc_lookup_machine_cred(const char *service_name)
 {
 	struct auth_cred acred = {
 		.uid = RPC_MACHINE_CRED_USERID,
 		.gid = RPC_MACHINE_CRED_GROUPID,
+		.principal = service_name,
 		.machine_cred = 1,
 	};
 
-	dprintk("RPC:       looking up machine cred\n");
+	dprintk("RPC:       looking up machine cred for service %s\n",
+			service_name);
 	return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0);
 }
 EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index afb56553dfe7..28d72d298735 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -392,7 +392,8 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
 }
 
 static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
-				struct rpc_clnt *clnt, int machine_cred)
+				struct rpc_clnt *clnt,
+				const char *service_name)
 {
 	struct gss_api_mech *mech = gss_msg->auth->mech;
 	char *p = gss_msg->databuf;
@@ -407,12 +408,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 		p += len;
 		gss_msg->msg.len += len;
 	}
-	if (machine_cred) {
-		len = sprintf(p, "service=* ");
-		p += len;
-		gss_msg->msg.len += len;
-	} else if (!strcmp(clnt->cl_program->name, "nfs4_cb")) {
-		len = sprintf(p, "service=nfs ");
+	if (service_name != NULL) {
+		len = sprintf(p, "service=%s ", service_name);
 		p += len;
 		gss_msg->msg.len += len;
 	}
@@ -429,17 +426,18 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 }
 
 static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
-				struct rpc_clnt *clnt, int machine_cred)
+				struct rpc_clnt *clnt,
+				const char *service_name)
 {
 	if (pipe_version == 0)
 		gss_encode_v0_msg(gss_msg);
 	else /* pipe_version == 1 */
-		gss_encode_v1_msg(gss_msg, clnt, machine_cred);
+		gss_encode_v1_msg(gss_msg, clnt, service_name);
 }
 
-static inline struct gss_upcall_msg *
-gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid, struct rpc_clnt *clnt,
-		int machine_cred)
+static struct gss_upcall_msg *
+gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
+		uid_t uid, const char *service_name)
 {
 	struct gss_upcall_msg *gss_msg;
 	int vers;
@@ -459,7 +457,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid, struct rpc_clnt *clnt,
 	atomic_set(&gss_msg->count, 1);
 	gss_msg->uid = uid;
 	gss_msg->auth = gss_auth;
-	gss_encode_msg(gss_msg, clnt, machine_cred);
+	gss_encode_msg(gss_msg, clnt, service_name);
 	return gss_msg;
 }
 
@@ -471,7 +469,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr
 	struct gss_upcall_msg *gss_new, *gss_msg;
 	uid_t uid = cred->cr_uid;
 
-	gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred);
+	gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal);
 	if (IS_ERR(gss_new))
 		return gss_new;
 	gss_msg = gss_add_msg(gss_new);
@@ -995,7 +993,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
 	 */
 	cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
 	cred->gc_service = gss_auth->service;
-	cred->gc_machine_cred = acred->machine_cred;
+	cred->gc_principal = NULL;
+	if (acred->machine_cred)
+		cred->gc_principal = acred->principal;
 	kref_get(&gss_auth->kref);
 	return &cred->gc_base;
 
@@ -1030,7 +1030,12 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
 	if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags))
 		return 0;
 out:
-	if (acred->machine_cred != gss_cred->gc_machine_cred)
+	if (acred->principal != NULL) {
+		if (gss_cred->gc_principal == NULL)
+			return 0;
+		return strcmp(acred->principal, gss_cred->gc_principal) == 0;
+	}
+	if (gss_cred->gc_principal != NULL)
 		return 0;
 	return rc->cr_uid == acred->uid;
 }
@@ -1104,7 +1109,8 @@ static int gss_renew_cred(struct rpc_task *task)
 	struct rpc_auth *auth = oldcred->cr_auth;
 	struct auth_cred acred = {
 		.uid = oldcred->cr_uid,
-		.machine_cred = gss_cred->gc_machine_cred,
+		.principal = gss_cred->gc_principal,
+		.machine_cred = (gss_cred->gc_principal != NULL ? 1 : 0),
 	};
 	struct rpc_cred *new;
 
-- 
cgit v1.2.3


From bf118a342f10dafe44b14451a1392c3254629a1f Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 7 Dec 2011 11:55:27 -0500
Subject: NFSv4: include bitmap in nfsv4 get acl data

The NFSv4 bitmap size is unbounded: a server can return an arbitrary
sized bitmap in an FATTR4_WORD0_ACL request.  Replace using the
nfs4_fattr_bitmap_maxsz as a guess to the maximum bitmask returned by a server
with the inclusion of the bitmap (xdr length plus bitmasks) and the acl data
xdr length to the (cached) acl page data.

This is a general solution to commit e5012d1f "NFSv4.1: update
nfs4_fattr_bitmap_maxsz" and fixes hitting a BUG_ON in xdr_shrink_bufhead
when getting ACLs.

Fix a bug in decode_getacl that returned -EINVAL on ACLs > page when getxattr
was called with a NULL buffer, preventing ACL > PAGE_SIZE from being retrieved.

Cc: stable@kernel.org
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c          | 96 +++++++++++++++++++++++++++-------------------
 fs/nfs/nfs4xdr.c           | 31 +++++++++++----
 include/linux/nfs_xdr.h    |  5 +++
 include/linux/sunrpc/xdr.h |  2 +
 net/sunrpc/xdr.c           |  3 +-
 5 files changed, 89 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fcc2408d7ab0..3b1080118452 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3426,19 +3426,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
  */
 #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT)
 
-static void buf_to_pages(const void *buf, size_t buflen,
-		struct page **pages, unsigned int *pgbase)
-{
-	const void *p = buf;
-
-	*pgbase = offset_in_page(buf);
-	p -= *pgbase;
-	while (p < buf + buflen) {
-		*(pages++) = virt_to_page(p);
-		p += PAGE_CACHE_SIZE;
-	}
-}
-
 static int buf_to_pages_noslab(const void *buf, size_t buflen,
 		struct page **pages, unsigned int *pgbase)
 {
@@ -3535,9 +3522,19 @@ out:
 	nfs4_set_cached_acl(inode, acl);
 }
 
+/*
+ * The getxattr API returns the required buffer length when called with a
+ * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating
+ * the required buf.  On a NULL buf, we send a page of data to the server
+ * guessing that the ACL request can be serviced by a page. If so, we cache
+ * up to the page of ACL data, and the 2nd call to getxattr is serviced by
+ * the cache. If not so, we throw away the page, and cache the required
+ * length. The next getxattr call will then produce another round trip to
+ * the server, this time with the input buf of the required size.
+ */
 static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
-	struct page *pages[NFS4ACL_MAXPAGES];
+	struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
 	struct nfs_getaclargs args = {
 		.fh = NFS_FH(inode),
 		.acl_pages = pages,
@@ -3552,41 +3549,60 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	struct page *localpage = NULL;
-	int ret;
+	int ret = -ENOMEM, npages, i, acl_len = 0;
 
-	if (buflen < PAGE_SIZE) {
-		/* As long as we're doing a round trip to the server anyway,
-		 * let's be prepared for a page of acl data. */
-		localpage = alloc_page(GFP_KERNEL);
-		resp_buf = page_address(localpage);
-		if (localpage == NULL)
-			return -ENOMEM;
-		args.acl_pages[0] = localpage;
-		args.acl_pgbase = 0;
-		args.acl_len = PAGE_SIZE;
-	} else {
-		resp_buf = buf;
-		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
+	npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	/* As long as we're doing a round trip to the server anyway,
+	 * let's be prepared for a page of acl data. */
+	if (npages == 0)
+		npages = 1;
+
+	for (i = 0; i < npages; i++) {
+		pages[i] = alloc_page(GFP_KERNEL);
+		if (!pages[i])
+			goto out_free;
+	}
+	if (npages > 1) {
+		/* for decoding across pages */
+		args.acl_scratch = alloc_page(GFP_KERNEL);
+		if (!args.acl_scratch)
+			goto out_free;
 	}
-	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
+	args.acl_len = npages * PAGE_SIZE;
+	args.acl_pgbase = 0;
+	/* Let decode_getfacl know not to fail if the ACL data is larger than
+	 * the page we send as a guess */
+	if (buf == NULL)
+		res.acl_flags |= NFS4_ACL_LEN_REQUEST;
+	resp_buf = page_address(pages[0]);
+
+	dprintk("%s  buf %p buflen %ld npages %d args.acl_len %ld\n",
+		__func__, buf, buflen, npages, args.acl_len);
+	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
+			     &msg, &args.seq_args, &res.seq_res, 0);
 	if (ret)
 		goto out_free;
-	if (res.acl_len > args.acl_len)
-		nfs4_write_cached_acl(inode, NULL, res.acl_len);
+
+	acl_len = res.acl_len - res.acl_data_offset;
+	if (acl_len > args.acl_len)
+		nfs4_write_cached_acl(inode, NULL, acl_len);
 	else
-		nfs4_write_cached_acl(inode, resp_buf, res.acl_len);
+		nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset,
+				      acl_len);
 	if (buf) {
 		ret = -ERANGE;
-		if (res.acl_len > buflen)
+		if (acl_len > buflen)
 			goto out_free;
-		if (localpage)
-			memcpy(buf, resp_buf, res.acl_len);
+		_copy_from_pages(buf, pages, res.acl_data_offset,
+				res.acl_len);
 	}
-	ret = res.acl_len;
+	ret = acl_len;
 out_free:
-	if (localpage)
-		__free_page(localpage);
+	for (i = 0; i < npages; i++)
+		if (pages[i])
+			__free_page(pages[i]);
+	if (args.acl_scratch)
+		__free_page(args.acl_scratch);
 	return ret;
 }
 
@@ -3617,6 +3633,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 		nfs_zap_acl_cache(inode);
 	ret = nfs4_read_cached_acl(inode, buf, buflen);
 	if (ret != -ENOENT)
+		/* -ENOENT is returned if there is no ACL or if there is an ACL
+		 * but no cached acl data, just the acl length */
 		return ret;
 	return nfs4_get_acl_uncached(inode, buf, buflen);
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e6161b213ed1..dcaf69309d8e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2517,11 +2517,13 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
-	replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1;
+	replen = hdr.replen + op_decode_hdr_maxsz + 1;
 	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, args->acl_pgbase, args->acl_len);
+	xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
+
 	encode_nops(&hdr);
 }
 
@@ -4957,17 +4959,18 @@ decode_restorefh(struct xdr_stream *xdr)
 }
 
 static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
-		size_t *acl_len)
+			 struct nfs_getaclres *res)
 {
-	__be32 *savep;
+	__be32 *savep, *bm_p;
 	uint32_t attrlen,
 		 bitmap[3] = {0};
 	struct kvec *iov = req->rq_rcv_buf.head;
 	int status;
 
-	*acl_len = 0;
+	res->acl_len = 0;
 	if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
 		goto out;
+	bm_p = xdr->p;
 	if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
 		goto out;
 	if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -4979,18 +4982,30 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 		size_t hdrlen;
 		u32 recvd;
 
+		/* The bitmap (xdr len + bitmaps) and the attr xdr len words
+		 * are stored with the acl data to handle the problem of
+		 * variable length bitmaps.*/
+		xdr->p = bm_p;
+		res->acl_data_offset = be32_to_cpup(bm_p) + 2;
+		res->acl_data_offset <<= 2;
+
 		/* We ignore &savep and don't do consistency checks on
 		 * the attr length.  Let userspace figure it out.... */
 		hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
+		attrlen += res->acl_data_offset;
 		recvd = req->rq_rcv_buf.len - hdrlen;
 		if (attrlen > recvd) {
-			dprintk("NFS: server cheating in getattr"
-					" acl reply: attrlen %u > recvd %u\n",
+			if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
+				/* getxattr interface called with a NULL buf */
+				res->acl_len = attrlen;
+				goto out;
+			}
+			dprintk("NFS: acl reply: attrlen %u > recvd %u\n",
 					attrlen, recvd);
 			return -EINVAL;
 		}
 		xdr_read_pages(xdr, attrlen);
-		*acl_len = attrlen;
+		res->acl_len = attrlen;
 	} else
 		status = -EOPNOTSUPP;
 
@@ -6028,7 +6043,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_getacl(xdr, rqstp, &res->acl_len);
+	status = decode_getacl(xdr, rqstp, res);
 
 out:
 	return status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2a7c533be5dd..6c898afe6095 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -602,11 +602,16 @@ struct nfs_getaclargs {
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
+	struct page *			acl_scratch;
 	struct nfs4_sequence_args 	seq_args;
 };
 
+/* getxattr ACL interface flags */
+#define NFS4_ACL_LEN_REQUEST	0x0001	/* zero length getxattr buffer */
 struct nfs_getaclres {
 	size_t				acl_len;
+	size_t				acl_data_offset;
+	int				acl_flags;
 	struct nfs4_sequence_res	seq_res;
 };
 
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index a20970ef9e4e..af70af333546 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -191,6 +191,8 @@ extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
 			     struct xdr_array2_desc *desc);
 extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
 			     struct xdr_array2_desc *desc);
+extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase,
+			     size_t len);
 
 /*
  * Provide some simple tools for XDR buffer overflow-checking etc.
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 277ebd4bf095..593f4c605305 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -296,7 +296,7 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
  * Copies data into an arbitrary memory location from an array of pages
  * The copy is assumed to be non-overlapping.
  */
-static void
+void
 _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
 {
 	struct page **pgfrom;
@@ -324,6 +324,7 @@ _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len)
 
 	} while ((len -= copy) != 0);
 }
+EXPORT_SYMBOL_GPL(_copy_from_pages);
 
 /*
  * xdr_shrink_bufhead
-- 
cgit v1.2.3


From 0aaaf5c424c7ffd6b0c4253251356558b16ef3a2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 6 Dec 2011 16:13:48 -0500
Subject: NFS: Cache state owners after files are closed

Servers have a finite amount of memory to store NFSv4 open and lock
owners.  Moreover, servers may have a difficult time determining when
they can reap their state owner table, thanks to gray areas in the
NFSv4 protocol specification.  Thus clients should be careful to reuse
state owners when possible.

Currently Linux is not too careful.  When a user has closed all her
files on one mount point, the state owner's reference count goes to
zero, and it is released.  The next OPEN allocates a new one.  A
workload that serially opens and closes files can run through a large
number of open owners this way.

When a state owner's reference count goes to zero, slap it onto a free
list for that nfs_server, with an expiry time.  Garbage collect before
looking for a state owner.  This makes state owners for active users
available for re-use.

Now that there can be unused state owners remaining at umount time,
purge the state owner free list when a server is destroyed.  Also be
sure not to reclaim unused state owners during state recovery.

This change has benefits for the client as well.  For some workloads,
this approach drops the number of OPEN_CONFIRM calls from the same as
the number of OPEN calls, down to just one.  This reduces wire traffic
and thus open(2) latency.  Before this patch, untarring a kernel
source tarball shows the OPEN_CONFIRM call counter steadily increasing
through the test.  With the patch, the OPEN_CONFIRM count remains at 1
throughout the entire untar.

As long as the expiry time is kept short, I don't think garbage
collection should be terribly expensive, although it does bounce the
clp->cl_lock around a bit.

[ At some point we should rationalize the use of the nfs_server
->destroy method. ]

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[Trond: Fixed a garbage collection race and a few efficiency issues]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  8 +++++
 fs/nfs/nfs4_fs.h          |  3 ++
 fs/nfs/nfs4state.c        | 89 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 92 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 32ea37198e93..41bd67f80d31 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -250,6 +250,11 @@ static void pnfs_init_server(struct nfs_server *server)
 	rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
 }
 
+static void nfs4_destroy_server(struct nfs_server *server)
+{
+	nfs4_purge_state_owners(server);
+}
+
 #else
 static void nfs4_shutdown_client(struct nfs_client *clp)
 {
@@ -1065,6 +1070,7 @@ static struct nfs_server *nfs_alloc_server(void)
 	INIT_LIST_HEAD(&server->master_link);
 	INIT_LIST_HEAD(&server->delegations);
 	INIT_LIST_HEAD(&server->layouts);
+	INIT_LIST_HEAD(&server->state_owners_lru);
 
 	atomic_set(&server->active, 0);
 
@@ -1538,6 +1544,7 @@ static int nfs4_server_common_setup(struct nfs_server *server,
 
 	nfs_server_insert_lists(server);
 	server->mount_time = jiffies;
+	server->destroy = nfs4_destroy_server;
 out:
 	nfs_free_fattr(fattr);
 	return error;
@@ -1719,6 +1726,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 
 	/* Copy data from the source */
 	server->nfs_client = source->nfs_client;
+	server->destroy = source->destroy;
 	atomic_inc(&server->nfs_client->cl_count);
 	nfs_server_copy_userdata(server, source);
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 693ae22f8731..4d7d0aedc101 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -94,6 +94,8 @@ struct nfs_unique_id {
 struct nfs4_state_owner {
 	struct nfs_unique_id so_owner_id;
 	struct nfs_server    *so_server;
+	struct list_head     so_lru;
+	unsigned long        so_expires;
 	struct rb_node	     so_server_node;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
@@ -319,6 +321,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
+extern void nfs4_purge_state_owners(struct nfs_server *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, fmode_t);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6354e4fcc829..a53f33b4ac3a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -49,6 +49,7 @@
 #include <linux/ratelimit.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
+#include <linux/jiffies.h>
 
 #include "nfs4_fs.h"
 #include "callback.h"
@@ -388,6 +389,8 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
 		else if (cred > sp->so_cred)
 			p = &parent->rb_right;
 		else {
+			if (!list_empty(&sp->so_lru))
+				list_del_init(&sp->so_lru);
 			atomic_inc(&sp->so_count);
 			return sp;
 		}
@@ -412,6 +415,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
 		else if (new->so_cred > sp->so_cred)
 			p = &parent->rb_right;
 		else {
+			if (!list_empty(&sp->so_lru))
+				list_del_init(&sp->so_lru);
 			atomic_inc(&sp->so_count);
 			return sp;
 		}
@@ -453,6 +458,7 @@ nfs4_alloc_state_owner(void)
 	spin_lock_init(&sp->so_sequence.lock);
 	INIT_LIST_HEAD(&sp->so_sequence.list);
 	atomic_set(&sp->so_count, 1);
+	INIT_LIST_HEAD(&sp->so_lru);
 	return sp;
 }
 
@@ -470,6 +476,38 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 	}
 }
 
+static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
+{
+	rpc_destroy_wait_queue(&sp->so_sequence.wait);
+	put_rpccred(sp->so_cred);
+	kfree(sp);
+}
+
+static void nfs4_gc_state_owners(struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_state_owner *sp, *tmp;
+	unsigned long time_min, time_max;
+	LIST_HEAD(doomed);
+
+	spin_lock(&clp->cl_lock);
+	time_max = jiffies;
+	time_min = (long)time_max - (long)clp->cl_lease_time;
+	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+		/* NB: LRU is sorted so that oldest is at the head */
+		if (time_in_range(sp->so_expires, time_min, time_max))
+			break;
+		list_move(&sp->so_lru, &doomed);
+		nfs4_remove_state_owner_locked(sp);
+	}
+	spin_unlock(&clp->cl_lock);
+
+	list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+		list_del(&sp->so_lru);
+		nfs4_free_state_owner(sp);
+	}
+}
+
 /**
  * nfs4_get_state_owner - Look up a state owner given a credential
  * @server: nfs_server to search
@@ -487,10 +525,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
 	sp = nfs4_find_state_owner_locked(server, cred);
 	spin_unlock(&clp->cl_lock);
 	if (sp != NULL)
-		return sp;
+		goto out;
 	new = nfs4_alloc_state_owner();
 	if (new == NULL)
-		return NULL;
+		goto out;
 	new->so_server = server;
 	new->so_cred = cred;
 	spin_lock(&clp->cl_lock);
@@ -502,26 +540,58 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
 		rpc_destroy_wait_queue(&new->so_sequence.wait);
 		kfree(new);
 	}
+out:
+	nfs4_gc_state_owners(server);
 	return sp;
 }
 
 /**
  * nfs4_put_state_owner - Release a nfs4_state_owner
  * @sp: state owner data to release
- *
  */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs_client *clp = sp->so_server->nfs_client;
-	struct rpc_cred *cred = sp->so_cred;
+	struct nfs_server *server = sp->so_server;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 		return;
-	nfs4_remove_state_owner_locked(sp);
+
+	if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+		sp->so_expires = jiffies;
+		list_add_tail(&sp->so_lru, &server->state_owners_lru);
+		spin_unlock(&clp->cl_lock);
+	} else {
+		nfs4_remove_state_owner_locked(sp);
+		spin_unlock(&clp->cl_lock);
+		nfs4_free_state_owner(sp);
+	}
+}
+
+/**
+ * nfs4_purge_state_owners - Release all cached state owners
+ * @server: nfs_server with cached state owners to release
+ *
+ * Called at umount time.  Remaining state owners will be on
+ * the LRU with ref count of zero.
+ */
+void nfs4_purge_state_owners(struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_state_owner *sp, *tmp;
+	LIST_HEAD(doomed);
+
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+		list_move(&sp->so_lru, &doomed);
+		nfs4_remove_state_owner_locked(sp);
+	}
 	spin_unlock(&clp->cl_lock);
-	rpc_destroy_wait_queue(&sp->so_sequence.wait);
-	put_rpccred(cred);
-	kfree(sp);
+
+	list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+		list_del(&sp->so_lru);
+		nfs4_free_state_owner(sp);
+	}
 }
 
 static struct nfs4_state *
@@ -1393,6 +1463,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
 restart:
 	rcu_read_lock();
 	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		nfs4_purge_state_owners(server);
 		spin_lock(&clp->cl_lock);
 		for (pos = rb_first(&server->state_owners);
 		     pos != NULL;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b5479df8378d..ba4d7656ecfd 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -153,6 +153,7 @@ struct nfs_server {
 	struct rb_root		openowner_id;
 	struct rb_root		lockowner_id;
 #endif
+	struct list_head	state_owners_lru;
 	struct list_head	layouts;
 	struct list_head	delegations;
 	void (*destroy)(struct nfs_server *);
-- 
cgit v1.2.3


From 6a9de49115d5ff9871d953af1a5c8249e1585731 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:14 -0500
Subject: capabilities: remove the task from capable LSM hook entirely

The capabilities framework is based around credentials, not necessarily the
current task.  Yet we still passed the current task down into LSMs from the
security_capable() LSM hook as if it was a meaningful portion of the security
decision.  This patch removes the 'generic' passing of current and instead
forces individual LSMs to use current explicitly if they think it is
appropriate.  In our case those LSMs are SELinux and AppArmor.

I believe the AppArmor use of current is incorrect, but that is wholely
unrelated to this patch.  This patch does not change what AppArmor does, it
just makes it clear in the AppArmor code that it is doing it.

The SELinux code still uses current in it's audit message, which may also be
wrong and needs further investigation.  Again this is NOT a change, it may
have always been wrong, this patch just makes it clear what is happening.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/security.h | 16 +++++++---------
 security/apparmor/lsm.c  |  8 ++++----
 security/commoncap.c     | 16 +++++++---------
 security/security.c      |  7 +++----
 security/selinux/hooks.c | 23 ++++++++++-------------
 5 files changed, 31 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ebd2a53a3d07..4921163b2752 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,8 +53,8 @@ struct user_namespace;
  * These functions are in security/capability.c and are used
  * as the default capabilities functions
  */
-extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
-		       struct user_namespace *ns, int cap, int audit);
+extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
+		       int cap, int audit);
 extern int cap_settime(const struct timespec *ts, const struct timezone *tz);
 extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
@@ -1261,7 +1261,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @capable:
  *	Check whether the @tsk process has the @cap capability in the indicated
  *	credentials.
- *	@tsk contains the task_struct for the process.
  *	@cred contains the credentials to use.
  *      @ns contains the user namespace we want the capability in
  *	@cap contains the capability <include/linux/capability.h>.
@@ -1385,8 +1384,8 @@ struct security_operations {
 		       const kernel_cap_t *effective,
 		       const kernel_cap_t *inheritable,
 		       const kernel_cap_t *permitted);
-	int (*capable) (struct task_struct *tsk, const struct cred *cred,
-			struct user_namespace *ns, int cap, int audit);
+	int (*capable) (const struct cred *cred, struct user_namespace *ns,
+			int cap, int audit);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
 	int (*quota_on) (struct dentry *dentry);
 	int (*syslog) (int type);
@@ -1867,7 +1866,7 @@ static inline int security_capset(struct cred *new,
 static inline int security_capable(struct user_namespace *ns,
 				   const struct cred *cred, int cap)
 {
-	return cap_capable(current, cred, ns, cap, SECURITY_CAP_AUDIT);
+	return cap_capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
 
 static inline int security_real_capable(struct task_struct *tsk, struct user_namespace *ns, int cap)
@@ -1875,7 +1874,7 @@ static inline int security_real_capable(struct task_struct *tsk, struct user_nam
 	int ret;
 
 	rcu_read_lock();
-	ret = cap_capable(tsk, __task_cred(tsk), ns, cap, SECURITY_CAP_AUDIT);
+	ret = cap_capable(__task_cred(tsk), ns, cap, SECURITY_CAP_AUDIT);
 	rcu_read_unlock();
 	return ret;
 }
@@ -1886,8 +1885,7 @@ int security_real_capable_noaudit(struct task_struct *tsk, struct user_namespace
 	int ret;
 
 	rcu_read_lock();
-	ret = cap_capable(tsk, __task_cred(tsk), ns, cap,
-			       SECURITY_CAP_NOAUDIT);
+	ret = cap_capable(__task_cred(tsk), ns, cap, SECURITY_CAP_NOAUDIT);
 	rcu_read_unlock();
 	return ret;
 }
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 37832026e58a..ef4e2a8a33a5 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -136,16 +136,16 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective,
 	return 0;
 }
 
-static int apparmor_capable(struct task_struct *task, const struct cred *cred,
-			    struct user_namespace *ns, int cap, int audit)
+static int apparmor_capable(const struct cred *cred, struct user_namespace *ns,
+			    int cap, int audit)
 {
 	struct aa_profile *profile;
 	/* cap_capable returns 0 on success, else -EPERM */
-	int error = cap_capable(task, cred, ns, cap, audit);
+	int error = cap_capable(cred, ns, cap, audit);
 	if (!error) {
 		profile = aa_cred_profile(cred);
 		if (!unconfined(profile))
-			error = aa_capable(task, profile, cap, audit);
+			error = aa_capable(current, profile, cap, audit);
 	}
 	return error;
 }
diff --git a/security/commoncap.c b/security/commoncap.c
index a93b3b733079..89f02ff66af9 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -66,7 +66,6 @@ EXPORT_SYMBOL(cap_netlink_recv);
 
 /**
  * cap_capable - Determine whether a task has a particular effective capability
- * @tsk: The task to query
  * @cred: The credentials to use
  * @ns:  The user namespace in which we need the capability
  * @cap: The capability to check for
@@ -80,8 +79,8 @@ EXPORT_SYMBOL(cap_netlink_recv);
  * cap_has_capability() returns 0 when a task has a capability, but the
  * kernel's capable() and has_capability() returns 1 for this case.
  */
-int cap_capable(struct task_struct *tsk, const struct cred *cred,
-		struct user_namespace *targ_ns, int cap, int audit)
+int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
+		int cap, int audit)
 {
 	for (;;) {
 		/* The creator of the user namespace has all caps. */
@@ -222,9 +221,8 @@ static inline int cap_inh_is_capped(void)
 	/* they are so limited unless the current task has the CAP_SETPCAP
 	 * capability
 	 */
-	if (cap_capable(current, current_cred(),
-			current_cred()->user->user_ns, CAP_SETPCAP,
-			SECURITY_CAP_AUDIT) == 0)
+	if (cap_capable(current_cred(), current_cred()->user->user_ns,
+			CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
 		return 0;
 	return 1;
 }
@@ -870,7 +868,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		     & (new->securebits ^ arg2))			/*[1]*/
 		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
-		    || (cap_capable(current, current_cred(),
+		    || (cap_capable(current_cred(),
 				    current_cred()->user->user_ns, CAP_SETPCAP,
 				    SECURITY_CAP_AUDIT) != 0)		/*[4]*/
 			/*
@@ -936,7 +934,7 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int cap_sys_admin = 0;
 
-	if (cap_capable(current, current_cred(), &init_user_ns, CAP_SYS_ADMIN,
+	if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
 			SECURITY_CAP_NOAUDIT) == 0)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
@@ -963,7 +961,7 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
 	int ret = 0;
 
 	if (addr < dac_mmap_min_addr) {
-		ret = cap_capable(current, current_cred(), &init_user_ns, CAP_SYS_RAWIO,
+		ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
 				  SECURITY_CAP_AUDIT);
 		/* set PF_SUPERPRIV if it turns out we allow the low mmap */
 		if (ret == 0)
diff --git a/security/security.c b/security/security.c
index d9e153390926..9ae68c64455e 100644
--- a/security/security.c
+++ b/security/security.c
@@ -157,8 +157,7 @@ int security_capset(struct cred *new, const struct cred *old,
 int security_capable(struct user_namespace *ns, const struct cred *cred,
 		     int cap)
 {
-	return security_ops->capable(current, cred, ns, cap,
-				     SECURITY_CAP_AUDIT);
+	return security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
 
 int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
@@ -168,7 +167,7 @@ int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
 	int ret;
 
 	cred = get_task_cred(tsk);
-	ret = security_ops->capable(tsk, cred, ns, cap, SECURITY_CAP_AUDIT);
+	ret = security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 	put_cred(cred);
 	return ret;
 }
@@ -180,7 +179,7 @@ int security_real_capable_noaudit(struct task_struct *tsk,
 	int ret;
 
 	cred = get_task_cred(tsk);
-	ret = security_ops->capable(tsk, cred, ns, cap, SECURITY_CAP_NOAUDIT);
+	ret = security_ops->capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
 	put_cred(cred);
 	return ret;
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index e545b9f67072..c9605c4a2e08 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1414,8 +1414,7 @@ static int current_has_perm(const struct task_struct *tsk,
 #endif
 
 /* Check whether a task is allowed to use a capability. */
-static int task_has_capability(struct task_struct *tsk,
-			       const struct cred *cred,
+static int cred_has_capability(const struct cred *cred,
 			       int cap, int audit)
 {
 	struct common_audit_data ad;
@@ -1426,7 +1425,7 @@ static int task_has_capability(struct task_struct *tsk,
 	int rc;
 
 	COMMON_AUDIT_DATA_INIT(&ad, CAP);
-	ad.tsk = tsk;
+	ad.tsk = current;
 	ad.u.cap = cap;
 
 	switch (CAP_TO_INDEX(cap)) {
@@ -1867,16 +1866,16 @@ static int selinux_capset(struct cred *new, const struct cred *old,
  * the CAP_SETUID and CAP_SETGID capabilities using the capable hook.
  */
 
-static int selinux_capable(struct task_struct *tsk, const struct cred *cred,
-			   struct user_namespace *ns, int cap, int audit)
+static int selinux_capable(const struct cred *cred, struct user_namespace *ns,
+			   int cap, int audit)
 {
 	int rc;
 
-	rc = cap_capable(tsk, cred, ns, cap, audit);
+	rc = cap_capable(cred, ns, cap, audit);
 	if (rc)
 		return rc;
 
-	return task_has_capability(tsk, cred, cap, audit);
+	return cred_has_capability(cred, cap, audit);
 }
 
 static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb)
@@ -1953,8 +1952,7 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int rc, cap_sys_admin = 0;
 
-	rc = selinux_capable(current, current_cred(),
-			     &init_user_ns, CAP_SYS_ADMIN,
+	rc = selinux_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
 			     SECURITY_CAP_NOAUDIT);
 	if (rc == 0)
 		cap_sys_admin = 1;
@@ -2858,8 +2856,7 @@ static int selinux_inode_getsecurity(const struct inode *inode, const char *name
 	 * and lack of permission just means that we fall back to the
 	 * in-core context value, not a denial.
 	 */
-	error = selinux_capable(current, current_cred(),
-				&init_user_ns, CAP_MAC_ADMIN,
+	error = selinux_capable(current_cred(), &init_user_ns, CAP_MAC_ADMIN,
 				SECURITY_CAP_NOAUDIT);
 	if (!error)
 		error = security_sid_to_context_force(isec->sid, &context,
@@ -2992,8 +2989,8 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
 
 	case KDSKBENT:
 	case KDSKBSENT:
-		error = task_has_capability(current, cred, CAP_SYS_TTY_CONFIG,
-					SECURITY_CAP_AUDIT);
+		error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG,
+					    SECURITY_CAP_AUDIT);
 		break;
 
 	/* default case assumes that the command will go
-- 
cgit v1.2.3


From b7e724d303b684655e4ca3dabd5a6840ad19012d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilities: reverse arguments to security_capable

security_capable takes ns, cred, cap.  But the LSM capable() hook takes
cred, ns, cap.  The capability helper functions also take cred, ns, cap.
Rather than flip argument order just to flip it back, leave them alone.
Heck, this should be a little faster since argument will be in the right
place!

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 drivers/pci/pci-sysfs.c  | 2 +-
 include/linux/security.h | 6 +++---
 kernel/capability.c      | 2 +-
 security/security.c      | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 7bcf12adced7..a4457ab61342 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -431,7 +431,7 @@ pci_read_config(struct file *filp, struct kobject *kobj,
 	u8 *data = (u8*) buf;
 
 	/* Several chips lock up trying to read undefined config space */
-	if (security_capable(&init_user_ns, filp->f_cred, CAP_SYS_ADMIN) == 0) {
+	if (security_capable(filp->f_cred, &init_user_ns, CAP_SYS_ADMIN) == 0) {
 		size = dev->cfg_size;
 	} else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
 		size = 128;
diff --git a/include/linux/security.h b/include/linux/security.h
index 4921163b2752..ee969ff40a26 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1666,7 +1666,7 @@ int security_capset(struct cred *new, const struct cred *old,
 		    const kernel_cap_t *effective,
 		    const kernel_cap_t *inheritable,
 		    const kernel_cap_t *permitted);
-int security_capable(struct user_namespace *ns, const struct cred *cred,
+int security_capable(const struct cred *cred, struct user_namespace *ns,
 			int cap);
 int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
 			int cap);
@@ -1863,8 +1863,8 @@ static inline int security_capset(struct cred *new,
 	return cap_capset(new, old, effective, inheritable, permitted);
 }
 
-static inline int security_capable(struct user_namespace *ns,
-				   const struct cred *cred, int cap)
+static inline int security_capable(const struct cred *cred,
+				   struct user_namespace *ns, int cap)
 {
 	return cap_capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
diff --git a/kernel/capability.c b/kernel/capability.c
index 283c529f8b1c..d98392719adb 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -374,7 +374,7 @@ bool ns_capable(struct user_namespace *ns, int cap)
 		BUG();
 	}
 
-	if (security_capable(ns, current_cred(), cap) == 0) {
+	if (security_capable(current_cred(), ns, cap) == 0) {
 		current->flags |= PF_SUPERPRIV;
 		return true;
 	}
diff --git a/security/security.c b/security/security.c
index 9ae68c64455e..b9e57f4fc44a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -154,7 +154,7 @@ int security_capset(struct cred *new, const struct cred *old,
 				    effective, inheritable, permitted);
 }
 
-int security_capable(struct user_namespace *ns, const struct cred *cred,
+int security_capable(const struct cred *cred, struct user_namespace *ns,
 		     int cap)
 {
 	return security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
-- 
cgit v1.2.3


From c7eba4a97563fd8b431787f7ad623444f2da80c6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilities: introduce security_capable_noaudit

Exactly like security_capable except don't audit any denials.  This is for
places where the kernel may make decisions about what to do if a task has a
given capability, but which failing that capability is not a sign of a
security policy violation.  An example is checking if a task has
CAP_SYS_ADMIN to lower it's likelyhood of being killed by the oom killer.
This check is not a security violation if it is denied.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 include/linux/security.h | 7 +++++++
 security/security.c      | 6 ++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ee969ff40a26..caff54eee686 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1668,6 +1668,8 @@ int security_capset(struct cred *new, const struct cred *old,
 		    const kernel_cap_t *permitted);
 int security_capable(const struct cred *cred, struct user_namespace *ns,
 			int cap);
+int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
+			     int cap);
 int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
 			int cap);
 int security_real_capable_noaudit(struct task_struct *tsk,
@@ -1869,6 +1871,11 @@ static inline int security_capable(const struct cred *cred,
 	return cap_capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
 
+static inline int security_capable_noaudit(const struct cred *cred,
+					   struct user_namespace *ns, int cap) {
+	return cap_capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
+}
+
 static inline int security_real_capable(struct task_struct *tsk, struct user_namespace *ns, int cap)
 {
 	int ret;
diff --git a/security/security.c b/security/security.c
index b9e57f4fc44a..b7edaae77d1d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -160,6 +160,12 @@ int security_capable(const struct cred *cred, struct user_namespace *ns,
 	return security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
 }
 
+int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
+			     int cap)
+{
+	return security_ops->capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
+}
+
 int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
 			  int cap)
 {
-- 
cgit v1.2.3


From 2920a8409de5a51575d03deca07e5bb2be6fc98d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilities: remove all _real_ interfaces

The name security_real_capable and security_real_capable_noaudit just don't
make much sense to me.  Convert them to use security_capable and
security_capable_noaudit.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 include/linux/security.h | 25 -------------------------
 kernel/capability.c      | 18 +++++++++++++++---
 security/security.c      | 24 ------------------------
 3 files changed, 15 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index caff54eee686..e345a9313a60 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1670,10 +1670,6 @@ int security_capable(const struct cred *cred, struct user_namespace *ns,
 			int cap);
 int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
 			     int cap);
-int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
-			int cap);
-int security_real_capable_noaudit(struct task_struct *tsk,
-			struct user_namespace *ns, int cap);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
 int security_quota_on(struct dentry *dentry);
 int security_syslog(int type);
@@ -1876,27 +1872,6 @@ static inline int security_capable_noaudit(const struct cred *cred,
 	return cap_capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
 }
 
-static inline int security_real_capable(struct task_struct *tsk, struct user_namespace *ns, int cap)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = cap_capable(__task_cred(tsk), ns, cap, SECURITY_CAP_AUDIT);
-	rcu_read_unlock();
-	return ret;
-}
-
-static inline
-int security_real_capable_noaudit(struct task_struct *tsk, struct user_namespace *ns, int cap)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = cap_capable(__task_cred(tsk), ns, cap, SECURITY_CAP_NOAUDIT);
-	rcu_read_unlock();
-	return ret;
-}
-
 static inline int security_quotactl(int cmds, int type, int id,
 				     struct super_block *sb)
 {
diff --git a/kernel/capability.c b/kernel/capability.c
index d98392719adb..ff50ab62cfca 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -298,7 +298,11 @@ error:
  */
 bool has_capability(struct task_struct *t, int cap)
 {
-	int ret = security_real_capable(t, &init_user_ns, cap);
+	int ret;
+
+	rcu_read_lock();
+	ret = security_capable(__task_cred(t), &init_user_ns, cap);
+	rcu_read_unlock();
 
 	return (ret == 0);
 }
@@ -317,7 +321,11 @@ bool has_capability(struct task_struct *t, int cap)
 bool has_ns_capability(struct task_struct *t,
 		       struct user_namespace *ns, int cap)
 {
-	int ret = security_real_capable(t, ns, cap);
+	int ret;
+
+	rcu_read_lock();
+	ret = security_capable(__task_cred(t), ns, cap);
+	rcu_read_unlock();
 
 	return (ret == 0);
 }
@@ -335,7 +343,11 @@ bool has_ns_capability(struct task_struct *t,
  */
 bool has_capability_noaudit(struct task_struct *t, int cap)
 {
-	int ret = security_real_capable_noaudit(t, &init_user_ns, cap);
+	int ret;
+
+	rcu_read_lock();
+	ret = security_capable_noaudit(__task_cred(t), &init_user_ns, cap);
+	rcu_read_unlock();
 
 	return (ret == 0);
 }
diff --git a/security/security.c b/security/security.c
index b7edaae77d1d..8900c5c4db5c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -166,30 +166,6 @@ int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
 	return security_ops->capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
 }
 
-int security_real_capable(struct task_struct *tsk, struct user_namespace *ns,
-			  int cap)
-{
-	const struct cred *cred;
-	int ret;
-
-	cred = get_task_cred(tsk);
-	ret = security_ops->capable(cred, ns, cap, SECURITY_CAP_AUDIT);
-	put_cred(cred);
-	return ret;
-}
-
-int security_real_capable_noaudit(struct task_struct *tsk,
-				  struct user_namespace *ns, int cap)
-{
-	const struct cred *cred;
-	int ret;
-
-	cred = get_task_cred(tsk);
-	ret = security_ops->capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
-	put_cred(cred);
-	return ret;
-}
-
 int security_quotactl(int cmds, int type, int id, struct super_block *sb)
 {
 	return security_ops->quotactl(cmds, type, id, sb);
-- 
cgit v1.2.3


From 7b61d648499e74dbec3d4ce645675e0ae040ae78 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilites: introduce new has_ns_capabilities_noaudit

For consistency in interfaces, introduce a new interface called
has_ns_capabilities_noaudit.  It checks if the given task has the given
capability in the given namespace.  Use this new function by
has_capabilities_noaudit.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 include/linux/capability.h |  2 ++
 kernel/capability.c        | 30 +++++++++++++++++++++++++-----
 2 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c42112350003..63f59fa8769d 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -543,6 +543,8 @@ extern bool has_capability(struct task_struct *t, int cap);
 extern bool has_ns_capability(struct task_struct *t,
 			      struct user_namespace *ns, int cap);
 extern bool has_capability_noaudit(struct task_struct *t, int cap);
+extern bool has_ns_capability_noaudit(struct task_struct *t,
+				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool task_ns_capable(struct task_struct *t, int cap);
diff --git a/kernel/capability.c b/kernel/capability.c
index fb815d1b9ea2..d8398e962470 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -325,27 +325,47 @@ bool has_capability(struct task_struct *t, int cap)
 }
 
 /**
- * has_capability_noaudit - Does a task have a capability (unaudited)
+ * has_ns_capability_noaudit - Does a task have a capability (unaudited)
+ * in a specific user ns.
  * @t: The task in question
+ * @ns: target user namespace
  * @cap: The capability to be tested for
  *
  * Return true if the specified task has the given superior capability
- * currently in effect to init_user_ns, false if not.  Don't write an
- * audit message for the check.
+ * currently in effect to the specified user namespace, false if not.
+ * Do not write an audit message for the check.
  *
  * Note that this does not set PF_SUPERPRIV on the task.
  */
-bool has_capability_noaudit(struct task_struct *t, int cap)
+bool has_ns_capability_noaudit(struct task_struct *t,
+			       struct user_namespace *ns, int cap)
 {
 	int ret;
 
 	rcu_read_lock();
-	ret = security_capable_noaudit(__task_cred(t), &init_user_ns, cap);
+	ret = security_capable_noaudit(__task_cred(t), ns, cap);
 	rcu_read_unlock();
 
 	return (ret == 0);
 }
 
+/**
+ * has_capability_noaudit - Does a task have a capability (unaudited) in the
+ * initial user ns
+ * @t: The task in question
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect to init_user_ns, false if not.  Don't write an
+ * audit message for the check.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+bool has_capability_noaudit(struct task_struct *t, int cap)
+{
+	return has_ns_capability_noaudit(t, &init_user_ns, cap);
+}
+
 /**
  * capable - Determine if the current task has a superior capability in effect
  * @cap: The capability to be tested for
-- 
cgit v1.2.3


From f1c84dae0ecc51aa35c81f19a0ebcd6c0921ddcb Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: capabilities: remove task_ns_* functions

task_ in the front of a function, in the security subsystem anyway, means
to me at least, that we are operating with that task as the subject of the
security decision.  In this case what it means is that we are using current as
the subject but we use the task to get the right namespace.  Who in the world
would ever realize that's what task_ns_capability means just by the name?  This
patch eliminates the task_ns functions entirely and uses the has_ns_capability
function instead.  This means we explicitly open code the ns in question in
the caller.  I think it makes the caller a LOT more clear what is going on.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 include/linux/capability.h |  1 -
 include/linux/cred.h       |  6 ++++--
 kernel/capability.c        | 14 --------------
 kernel/ptrace.c            |  4 ++--
 kernel/sched.c             |  2 +-
 5 files changed, 7 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 63f59fa8769d..e3e8d9cb9b08 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -547,7 +547,6 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
 				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
-extern bool task_ns_capable(struct task_struct *t, int cap);
 extern bool nsown_capable(int cap);
 
 /* audit system wants to get cap info from files as well */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 40308969ed00..adadf71a7327 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -358,10 +358,12 @@ static inline void put_cred(const struct cred *_cred)
 #define current_security()	(current_cred_xxx(security))
 
 #ifdef CONFIG_USER_NS
-#define current_user_ns() (current_cred_xxx(user_ns))
+#define current_user_ns()	(current_cred_xxx(user_ns))
+#define task_user_ns(task)	(task_cred_xxx((task), user_ns))
 #else
 extern struct user_namespace init_user_ns;
-#define current_user_ns() (&init_user_ns)
+#define current_user_ns()	(&init_user_ns)
+#define task_user_ns(task)	(&init_user_ns)
 #endif
 
 
diff --git a/kernel/capability.c b/kernel/capability.c
index 47626446c39a..74fb3b603045 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -408,20 +408,6 @@ bool capable(int cap)
 }
 EXPORT_SYMBOL(capable);
 
-/**
- * task_ns_capable - Determine whether current task has a superior
- * capability targeted at a specific task's user namespace.
- * @t: The task whose user namespace is targeted.
- * @cap: The capability in question.
- *
- *  Return true if it does, false otherwise.
- */
-bool task_ns_capable(struct task_struct *t, int cap)
-{
-	return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
-}
-EXPORT_SYMBOL(task_ns_capable);
-
 /**
  * nsown_capable - Check superior capability to one's own user_ns
  * @cap: The capability in question
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a70d2a5d8c7b..210bbf045ee9 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -196,7 +196,7 @@ ok:
 	smp_rmb();
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
-	if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
+	if (!dumpable && !ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
 		return -EPERM;
 
 	return security_ptrace_access_check(task, mode);
@@ -266,7 +266,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 	task->ptrace = PT_PTRACED;
 	if (seize)
 		task->ptrace |= PT_SEIZED;
-	if (task_ns_capable(task, CAP_SYS_PTRACE))
+	if (ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
 		task->ptrace |= PT_PTRACE_CAP;
 
 	__ptrace_link(task, current);
diff --git a/kernel/sched.c b/kernel/sched.c
index b50b0f0c9aa9..5670028a9c16 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5409,7 +5409,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 		goto out_free_cpus_allowed;
 	}
 	retval = -EPERM;
-	if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
+	if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE))
 		goto out_unlock;
 
 	retval = security_task_setscheduler(p);
-- 
cgit v1.2.3


From 69f594a38967f4540ce7a29b3fd214e68a8330bd Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:15 -0500
Subject: ptrace: do not audit capability check when outputing /proc/pid/stat

Reading /proc/pid/stat of another process checks if one has ptrace permissions
on that process.  If one does have permissions it outputs some data about the
process which might have security and attack implications.  If the current
task does not have ptrace permissions the read still works, but those fields
are filled with inocuous (0) values.  Since this check and a subsequent denial
is not a violation of the security policy we should not audit such denials.

This can be quite useful to removing ptrace broadly across a system without
flooding the logs when ps is run or something which harmlessly walks proc.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge E. Hallyn <serge.hallyn@canonical.com>
---
 fs/proc/array.c          |  2 +-
 include/linux/ptrace.h   |  5 +++--
 kernel/ptrace.c          | 12 ++++++++++--
 security/selinux/hooks.c |  2 +-
 4 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a1dafd228d1..ddffd7a88b97 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -380,7 +380,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 
 	state = *get_task_state(task);
 	vsize = eip = esp = 0;
-	permitted = ptrace_may_access(task, PTRACE_MODE_READ);
+	permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT);
 	mm = get_task_mm(task);
 	if (mm) {
 		vsize = task_vsize(mm);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 800f113bea66..a27e56ca41a4 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -127,8 +127,9 @@ extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer);
-#define PTRACE_MODE_READ   1
-#define PTRACE_MODE_ATTACH 2
+#define PTRACE_MODE_READ	0x01
+#define PTRACE_MODE_ATTACH	0x02
+#define PTRACE_MODE_NOAUDIT	0x04
 /* Returns 0 on success, -errno on denial. */
 extern int __ptrace_may_access(struct task_struct *task, unsigned int mode);
 /* Returns true on success, false on denial. */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 210bbf045ee9..c890ac9a7962 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -161,6 +161,14 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 	return ret;
 }
 
+static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
+{
+	if (mode & PTRACE_MODE_NOAUDIT)
+		return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
+	else
+		return has_ns_capability(current, ns, CAP_SYS_PTRACE);
+}
+
 int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
 	const struct cred *cred = current_cred(), *tcred;
@@ -187,7 +195,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	     cred->gid == tcred->sgid &&
 	     cred->gid == tcred->gid))
 		goto ok;
-	if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
+	if (ptrace_has_cap(tcred->user->user_ns, mode))
 		goto ok;
 	rcu_read_unlock();
 	return -EPERM;
@@ -196,7 +204,7 @@ ok:
 	smp_rmb();
 	if (task->mm)
 		dumpable = get_dumpable(task->mm);
-	if (!dumpable && !ns_capable(task_user_ns(task), CAP_SYS_PTRACE))
+	if (!dumpable  && !ptrace_has_cap(task_user_ns(task), mode))
 		return -EPERM;
 
 	return security_ptrace_access_check(task, mode);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c9605c4a2e08..14f94cd29c80 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1809,7 +1809,7 @@ static int selinux_ptrace_access_check(struct task_struct *child,
 	if (rc)
 		return rc;
 
-	if (mode == PTRACE_MODE_READ) {
+	if (mode & PTRACE_MODE_READ) {
 		u32 sid = current_sid();
 		u32 csid = task_sid(child);
 		return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL);
-- 
cgit v1.2.3


From fd778461524849afd035679030ae8e8873c72b81 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:16 -0500
Subject: security: remove the security_netlink_recv hook as it is equivalent
 to capable()

Once upon a time netlink was not sync and we had to get the effective
capabilities from the skb that was being received.  Today we instead get
the capabilities from the current task.  This has rendered the entire
purpose of the hook moot as it is now functionally equivalent to the
capable() call.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 drivers/scsi/scsi_netlink.c     |  2 +-
 include/linux/security.h        | 14 --------------
 kernel/audit.c                  |  4 ++--
 net/core/rtnetlink.c            |  2 +-
 net/decnet/netfilter/dn_rtmsg.c |  2 +-
 net/ipv4/netfilter/ip_queue.c   |  2 +-
 net/ipv6/netfilter/ip6_queue.c  |  2 +-
 net/netfilter/nfnetlink.c       |  2 +-
 net/netlink/genetlink.c         |  2 +-
 net/xfrm/xfrm_user.c            |  2 +-
 security/capability.c           |  1 -
 security/commoncap.c            |  8 --------
 security/security.c             |  6 ------
 security/selinux/hooks.c        | 19 -------------------
 14 files changed, 10 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 26a8a45584ef..feee1cc39ea0 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -111,7 +111,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
 			goto next_msg;
 		}
 
-		if (security_netlink_recv(skb, CAP_SYS_ADMIN)) {
+		if (!capable(CAP_SYS_ADMIN)) {
 			err = -EPERM;
 			goto next_msg;
 		}
diff --git a/include/linux/security.h b/include/linux/security.h
index e345a9313a60..ba2d531c123f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -95,7 +95,6 @@ struct xfrm_user_sec_ctx;
 struct seq_file;
 
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
-extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
 void reset_security_ops(void);
 
@@ -792,12 +791,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@skb contains the sk_buff structure for the netlink message.
  *	Return 0 if the information was successfully saved and message
  *	is allowed to be transmitted.
- * @netlink_recv:
- *	Check permission before processing the received netlink message in
- *	@skb.
- *	@skb contains the sk_buff structure for the netlink message.
- *	@cap indicates the capability required
- *	Return 0 if permission is granted.
  *
  * Security hooks for Unix domain networking.
  *
@@ -1556,7 +1549,6 @@ struct security_operations {
 			  struct sembuf *sops, unsigned nsops, int alter);
 
 	int (*netlink_send) (struct sock *sk, struct sk_buff *skb);
-	int (*netlink_recv) (struct sk_buff *skb, int cap);
 
 	void (*d_instantiate) (struct dentry *dentry, struct inode *inode);
 
@@ -1803,7 +1795,6 @@ void security_d_instantiate(struct dentry *dentry, struct inode *inode);
 int security_getprocattr(struct task_struct *p, char *name, char **value);
 int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size);
 int security_netlink_send(struct sock *sk, struct sk_buff *skb);
-int security_netlink_recv(struct sk_buff *skb, int cap);
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
@@ -2478,11 +2469,6 @@ static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb)
 	return cap_netlink_send(sk, skb);
 }
 
-static inline int security_netlink_recv(struct sk_buff *skb, int cap)
-{
-	return cap_netlink_recv(skb, cap);
-}
-
 static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
 {
 	return -EOPNOTSUPP;
diff --git a/kernel/audit.c b/kernel/audit.c
index 0a1355ca3d79..f3ba55fa0b70 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -601,13 +601,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
 	case AUDIT_TTY_SET:
 	case AUDIT_TRIM:
 	case AUDIT_MAKE_EQUIV:
-		if (security_netlink_recv(skb, CAP_AUDIT_CONTROL))
+		if (!capable(CAP_AUDIT_CONTROL))
 			err = -EPERM;
 		break;
 	case AUDIT_USER:
 	case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
 	case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
-		if (security_netlink_recv(skb, CAP_AUDIT_WRITE))
+		if (!capable(CAP_AUDIT_WRITE))
 			err = -EPERM;
 		break;
 	default:  /* bad msg */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 99d9e953fe39..d3a628196716 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1931,7 +1931,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	sz_idx = type>>2;
 	kind = type&3;
 
-	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (kind != 2 && !capable(CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 69975e0bcdea..1531135130db 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -108,7 +108,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 	if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
 		return;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
 	/* Eventually we might send routing messages too */
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e59aabd0eae4..ffabb2674718 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -430,7 +430,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
 	if (type <= IPQM_BASE)
 		return;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
 	spin_lock_bh(&queue_lock);
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index e63c3972a739..5e5ce778be7f 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -431,7 +431,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
 	if (type <= IPQM_BASE)
 		return;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
 	spin_lock_bh(&queue_lock);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 1905976b5135..e6c2b8f32180 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -130,7 +130,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	const struct nfnetlink_subsystem *ss;
 	int type, err;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
 	/* All the messages must at least contain nfgenmsg */
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 482fa571b4ee..05fedbf489a5 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -516,7 +516,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return -EOPNOTSUPP;
 
 	if ((ops->flags & GENL_ADMIN_PERM) &&
-	    security_netlink_recv(skb, CAP_NET_ADMIN))
+	    !capable(CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0256b8a0a7cf..71de86698efa 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2290,7 +2290,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	link = &xfrm_dispatch[type];
 
 	/* All operations require privileges, even GET */
-	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
 	if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
diff --git a/security/capability.c b/security/capability.c
index 2984ea4f776f..a2c064d10448 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -999,7 +999,6 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, sem_semctl);
 	set_to_cap_if_null(ops, sem_semop);
 	set_to_cap_if_null(ops, netlink_send);
-	set_to_cap_if_null(ops, netlink_recv);
 	set_to_cap_if_null(ops, d_instantiate);
 	set_to_cap_if_null(ops, getprocattr);
 	set_to_cap_if_null(ops, setprocattr);
diff --git a/security/commoncap.c b/security/commoncap.c
index 89f02ff66af9..7817a763444d 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -56,14 +56,6 @@ int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-int cap_netlink_recv(struct sk_buff *skb, int cap)
-{
-	if (!cap_raised(current_cap(), cap))
-		return -EPERM;
-	return 0;
-}
-EXPORT_SYMBOL(cap_netlink_recv);
-
 /**
  * cap_capable - Determine whether a task has a particular effective capability
  * @cred: The credentials to use
diff --git a/security/security.c b/security/security.c
index 8900c5c4db5c..85481a9c5632 100644
--- a/security/security.c
+++ b/security/security.c
@@ -922,12 +922,6 @@ int security_netlink_send(struct sock *sk, struct sk_buff *skb)
 	return security_ops->netlink_send(sk, skb);
 }
 
-int security_netlink_recv(struct sk_buff *skb, int cap)
-{
-	return security_ops->netlink_recv(skb, cap);
-}
-EXPORT_SYMBOL(security_netlink_recv);
-
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
 {
 	return security_ops->secid_to_secctx(secid, secdata, seclen);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 14f94cd29c80..3e37d25a9bbe 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4713,24 +4713,6 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
 	return selinux_nlmsg_perm(sk, skb);
 }
 
-static int selinux_netlink_recv(struct sk_buff *skb, int capability)
-{
-	int err;
-	struct common_audit_data ad;
-	u32 sid;
-
-	err = cap_netlink_recv(skb, capability);
-	if (err)
-		return err;
-
-	COMMON_AUDIT_DATA_INIT(&ad, CAP);
-	ad.u.cap = capability;
-
-	security_task_getsecid(current, &sid);
-	return avc_has_perm(sid, sid, SECCLASS_CAPABILITY,
-			    CAP_TO_MASK(capability), &ad);
-}
-
 static int ipc_alloc_security(struct task_struct *task,
 			      struct kern_ipc_perm *perm,
 			      u16 sclass)
@@ -5459,7 +5441,6 @@ static struct security_operations selinux_ops = {
 	.vm_enough_memory =		selinux_vm_enough_memory,
 
 	.netlink_send =			selinux_netlink_send,
-	.netlink_recv =			selinux_netlink_recv,
 
 	.bprm_set_creds =		selinux_bprm_set_creds,
 	.bprm_committing_creds =	selinux_bprm_committing_creds,
-- 
cgit v1.2.3


From f423e5ba76e7e4a6fcb4836b4f072d1fdebba8b5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 12:25:16 -0500
Subject: capabilities: remove __cap_full_set definition

In 5163b583a036b103c3cec7171d6731c125773ed6 I removed __cap_full_set but
forgot to remove it from a header.  Do that.

Reported-by: Kornilios Kourtis <kkourt@cslab.ece.ntua.gr>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/capability.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index e3e8d9cb9b08..d527b2880331 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -379,7 +379,6 @@ struct user_namespace;
 struct user_namespace *current_user_ns(void);
 
 extern const kernel_cap_t __cap_empty_set;
-extern const kernel_cap_t __cap_full_set;
 extern const kernel_cap_t __cap_init_eff_set;
 
 /*
-- 
cgit v1.2.3


From c78f2b64963654419a8cd3b7e264251860e9f9eb Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Mon, 5 Dec 2011 17:50:45 -0800
Subject: bq20z75: Rename to sbs-battery

This driver for the bq20z75 implemented the register spec defined
by the SBS standard. As this is not unique to this the TI part this
was originally written for, we can generalize this driver to
show its support for any SBS compliant battery.

Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/Kconfig             |   8 +-
 drivers/power/Makefile            |   2 +-
 drivers/power/bq20z75.c           | 871 --------------------------------------
 drivers/power/sbs-battery.c       | 871 ++++++++++++++++++++++++++++++++++++++
 include/linux/power/bq20z75.h     |  42 --
 include/linux/power/sbs-battery.h |  42 ++
 6 files changed, 918 insertions(+), 918 deletions(-)
 delete mode 100644 drivers/power/bq20z75.c
 create mode 100644 drivers/power/sbs-battery.c
 delete mode 100644 include/linux/power/bq20z75.h
 create mode 100644 include/linux/power/sbs-battery.h

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 3bd2ed86fea2..e24485f35384 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -116,12 +116,12 @@ config BATTERY_WM97XX
 	help
 	  Say Y to enable support for battery measured by WM97xx aux port.
 
-config BATTERY_BQ20Z75
-        tristate "TI BQ20z75 gas gauge"
+config BATTERY_SBS
+        tristate "SBS Compliant gas gauge"
         depends on I2C
         help
-         Say Y to include support for TI BQ20z75 SBS-compliant
-         gas gauge and protection IC.
+	  Say Y to include support for SBS battery driver for SBS-compliant
+	  gas gauges.
 
 config BATTERY_BQ27x00
 	tristate "BQ27x00 battery driver"
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 9a78b1dd570b..9c3bbf76a2bf 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_BATTERY_OLPC)	+= olpc_battery.o
 obj-$(CONFIG_BATTERY_TOSA)	+= tosa_battery.o
 obj-$(CONFIG_BATTERY_COLLIE)	+= collie_battery.o
 obj-$(CONFIG_BATTERY_WM97XX)	+= wm97xx_battery.o
-obj-$(CONFIG_BATTERY_BQ20Z75)	+= bq20z75.o
+obj-$(CONFIG_BATTERY_SBS)	+= sbs-battery.o
 obj-$(CONFIG_BATTERY_BQ27x00)	+= bq27x00_battery.o
 obj-$(CONFIG_BATTERY_DA9030)	+= da9030_battery.o
 obj-$(CONFIG_BATTERY_MAX17040)	+= max17040_battery.o
diff --git a/drivers/power/bq20z75.c b/drivers/power/bq20z75.c
deleted file mode 100644
index ce95ff791016..000000000000
--- a/drivers/power/bq20z75.c
+++ /dev/null
@@ -1,871 +0,0 @@
-/*
- * Gas Gauge driver for TI's BQ20Z75
- *
- * Copyright (c) 2010, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/power_supply.h>
-#include <linux/i2c.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/gpio.h>
-
-#include <linux/power/bq20z75.h>
-
-enum {
-	REG_MANUFACTURER_DATA,
-	REG_TEMPERATURE,
-	REG_VOLTAGE,
-	REG_CURRENT,
-	REG_CAPACITY,
-	REG_TIME_TO_EMPTY,
-	REG_TIME_TO_FULL,
-	REG_STATUS,
-	REG_CYCLE_COUNT,
-	REG_SERIAL_NUMBER,
-	REG_REMAINING_CAPACITY,
-	REG_REMAINING_CAPACITY_CHARGE,
-	REG_FULL_CHARGE_CAPACITY,
-	REG_FULL_CHARGE_CAPACITY_CHARGE,
-	REG_DESIGN_CAPACITY,
-	REG_DESIGN_CAPACITY_CHARGE,
-	REG_DESIGN_VOLTAGE,
-};
-
-/* Battery Mode defines */
-#define BATTERY_MODE_OFFSET		0x03
-#define BATTERY_MODE_MASK		0x8000
-enum bq20z75_battery_mode {
-	BATTERY_MODE_AMPS,
-	BATTERY_MODE_WATTS
-};
-
-/* manufacturer access defines */
-#define MANUFACTURER_ACCESS_STATUS	0x0006
-#define MANUFACTURER_ACCESS_SLEEP	0x0011
-
-/* battery status value bits */
-#define BATTERY_DISCHARGING		0x40
-#define BATTERY_FULL_CHARGED		0x20
-#define BATTERY_FULL_DISCHARGED		0x10
-
-#define BQ20Z75_DATA(_psp, _addr, _min_value, _max_value) { \
-	.psp = _psp, \
-	.addr = _addr, \
-	.min_value = _min_value, \
-	.max_value = _max_value, \
-}
-
-static const struct bq20z75_device_data {
-	enum power_supply_property psp;
-	u8 addr;
-	int min_value;
-	int max_value;
-} bq20z75_data[] = {
-	[REG_MANUFACTURER_DATA] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_PRESENT, 0x00, 0, 65535),
-	[REG_TEMPERATURE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TEMP, 0x08, 0, 65535),
-	[REG_VOLTAGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 20000),
-	[REG_CURRENT] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CURRENT_NOW, 0x0A, -32768,
-			32767),
-	[REG_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CAPACITY, 0x0E, 0, 100),
-	[REG_REMAINING_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_NOW, 0x0F, 0, 65535),
-	[REG_REMAINING_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_NOW, 0x0F, 0, 65535),
-	[REG_FULL_CHARGE_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
-	[REG_FULL_CHARGE_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
-	[REG_TIME_TO_EMPTY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0,
-			65535),
-	[REG_TIME_TO_FULL] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, 0x13, 0,
-			65535),
-	[REG_STATUS] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_STATUS, 0x16, 0, 65535),
-	[REG_CYCLE_COUNT] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CYCLE_COUNT, 0x17, 0, 65535),
-	[REG_DESIGN_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, 0x18, 0,
-			65535),
-	[REG_DESIGN_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, 0x18, 0,
-			65535),
-	[REG_DESIGN_VOLTAGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, 0x19, 0,
-			65535),
-	[REG_SERIAL_NUMBER] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_SERIAL_NUMBER, 0x1C, 0, 65535),
-};
-
-static enum power_supply_property bq20z75_properties[] = {
-	POWER_SUPPLY_PROP_STATUS,
-	POWER_SUPPLY_PROP_HEALTH,
-	POWER_SUPPLY_PROP_PRESENT,
-	POWER_SUPPLY_PROP_TECHNOLOGY,
-	POWER_SUPPLY_PROP_CYCLE_COUNT,
-	POWER_SUPPLY_PROP_VOLTAGE_NOW,
-	POWER_SUPPLY_PROP_CURRENT_NOW,
-	POWER_SUPPLY_PROP_CAPACITY,
-	POWER_SUPPLY_PROP_TEMP,
-	POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
-	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
-	POWER_SUPPLY_PROP_SERIAL_NUMBER,
-	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
-	POWER_SUPPLY_PROP_ENERGY_NOW,
-	POWER_SUPPLY_PROP_ENERGY_FULL,
-	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
-	POWER_SUPPLY_PROP_CHARGE_NOW,
-	POWER_SUPPLY_PROP_CHARGE_FULL,
-	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
-};
-
-struct bq20z75_info {
-	struct i2c_client		*client;
-	struct power_supply		power_supply;
-	struct bq20z75_platform_data	*pdata;
-	bool				is_present;
-	bool				gpio_detect;
-	bool				enable_detection;
-	int				irq;
-	int				last_state;
-	int				poll_time;
-	struct delayed_work		work;
-	int				ignore_changes;
-};
-
-static int bq20z75_read_word_data(struct i2c_client *client, u8 address)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-	s32 ret = 0;
-	int retries = 1;
-
-	if (bq20z75_device->pdata)
-		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
-
-	while (retries > 0) {
-		ret = i2c_smbus_read_word_data(client, address);
-		if (ret >= 0)
-			break;
-		retries--;
-	}
-
-	if (ret < 0) {
-		dev_dbg(&client->dev,
-			"%s: i2c read at address 0x%x failed\n",
-			__func__, address);
-		return ret;
-	}
-
-	return le16_to_cpu(ret);
-}
-
-static int bq20z75_write_word_data(struct i2c_client *client, u8 address,
-	u16 value)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-	s32 ret = 0;
-	int retries = 1;
-
-	if (bq20z75_device->pdata)
-		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
-
-	while (retries > 0) {
-		ret = i2c_smbus_write_word_data(client, address,
-			le16_to_cpu(value));
-		if (ret >= 0)
-			break;
-		retries--;
-	}
-
-	if (ret < 0) {
-		dev_dbg(&client->dev,
-			"%s: i2c write to address 0x%x failed\n",
-			__func__, address);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int bq20z75_get_battery_presence_and_health(
-	struct i2c_client *client, enum power_supply_property psp,
-	union power_supply_propval *val)
-{
-	s32 ret;
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-
-	if (psp == POWER_SUPPLY_PROP_PRESENT &&
-		bq20z75_device->gpio_detect) {
-		ret = gpio_get_value(
-			bq20z75_device->pdata->battery_detect);
-		if (ret == bq20z75_device->pdata->battery_detect_present)
-			val->intval = 1;
-		else
-			val->intval = 0;
-		bq20z75_device->is_present = val->intval;
-		return ret;
-	}
-
-	/* Write to ManufacturerAccess with
-	 * ManufacturerAccess command and then
-	 * read the status */
-	ret = bq20z75_write_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr,
-		MANUFACTURER_ACCESS_STATUS);
-	if (ret < 0) {
-		if (psp == POWER_SUPPLY_PROP_PRESENT)
-			val->intval = 0; /* battery removed */
-		return ret;
-	}
-
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr);
-	if (ret < 0)
-		return ret;
-
-	if (ret < bq20z75_data[REG_MANUFACTURER_DATA].min_value ||
-	    ret > bq20z75_data[REG_MANUFACTURER_DATA].max_value) {
-		val->intval = 0;
-		return 0;
-	}
-
-	/* Mask the upper nibble of 2nd byte and
-	 * lower byte of response then
-	 * shift the result by 8 to get status*/
-	ret &= 0x0F00;
-	ret >>= 8;
-	if (psp == POWER_SUPPLY_PROP_PRESENT) {
-		if (ret == 0x0F)
-			/* battery removed */
-			val->intval = 0;
-		else
-			val->intval = 1;
-	} else if (psp == POWER_SUPPLY_PROP_HEALTH) {
-		if (ret == 0x09)
-			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
-		else if (ret == 0x0B)
-			val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
-		else if (ret == 0x0C)
-			val->intval = POWER_SUPPLY_HEALTH_DEAD;
-		else
-			val->intval = POWER_SUPPLY_HEALTH_GOOD;
-	}
-
-	return 0;
-}
-
-static int bq20z75_get_battery_property(struct i2c_client *client,
-	int reg_offset, enum power_supply_property psp,
-	union power_supply_propval *val)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-	s32 ret;
-
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[reg_offset].addr);
-	if (ret < 0)
-		return ret;
-
-	/* returned values are 16 bit */
-	if (bq20z75_data[reg_offset].min_value < 0)
-		ret = (s16)ret;
-
-	if (ret >= bq20z75_data[reg_offset].min_value &&
-	    ret <= bq20z75_data[reg_offset].max_value) {
-		val->intval = ret;
-		if (psp != POWER_SUPPLY_PROP_STATUS)
-			return 0;
-
-		if (ret & BATTERY_FULL_CHARGED)
-			val->intval = POWER_SUPPLY_STATUS_FULL;
-		else if (ret & BATTERY_FULL_DISCHARGED)
-			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
-		else if (ret & BATTERY_DISCHARGING)
-			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
-		else
-			val->intval = POWER_SUPPLY_STATUS_CHARGING;
-
-		if (bq20z75_device->poll_time == 0)
-			bq20z75_device->last_state = val->intval;
-		else if (bq20z75_device->last_state != val->intval) {
-			cancel_delayed_work_sync(&bq20z75_device->work);
-			power_supply_changed(&bq20z75_device->power_supply);
-			bq20z75_device->poll_time = 0;
-		}
-	} else {
-		if (psp == POWER_SUPPLY_PROP_STATUS)
-			val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
-		else
-			val->intval = 0;
-	}
-
-	return 0;
-}
-
-static void  bq20z75_unit_adjustment(struct i2c_client *client,
-	enum power_supply_property psp, union power_supply_propval *val)
-{
-#define BASE_UNIT_CONVERSION		1000
-#define BATTERY_MODE_CAP_MULT_WATT	(10 * BASE_UNIT_CONVERSION)
-#define TIME_UNIT_CONVERSION		60
-#define TEMP_KELVIN_TO_CELSIUS		2731
-	switch (psp) {
-	case POWER_SUPPLY_PROP_ENERGY_NOW:
-	case POWER_SUPPLY_PROP_ENERGY_FULL:
-	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
-		/* bq20z75 provides energy in units of 10mWh.
-		 * Convert to µWh
-		 */
-		val->intval *= BATTERY_MODE_CAP_MULT_WATT;
-		break;
-
-	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-	case POWER_SUPPLY_PROP_CURRENT_NOW:
-	case POWER_SUPPLY_PROP_CHARGE_NOW:
-	case POWER_SUPPLY_PROP_CHARGE_FULL:
-	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
-		val->intval *= BASE_UNIT_CONVERSION;
-		break;
-
-	case POWER_SUPPLY_PROP_TEMP:
-		/* bq20z75 provides battery temperature in 0.1K
-		 * so convert it to 0.1°C
-		 */
-		val->intval -= TEMP_KELVIN_TO_CELSIUS;
-		break;
-
-	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
-	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
-		/* bq20z75 provides time to empty and time to full in minutes.
-		 * Convert to seconds
-		 */
-		val->intval *= TIME_UNIT_CONVERSION;
-		break;
-
-	default:
-		dev_dbg(&client->dev,
-			"%s: no need for unit conversion %d\n", __func__, psp);
-	}
-}
-
-static enum bq20z75_battery_mode
-bq20z75_set_battery_mode(struct i2c_client *client,
-	enum bq20z75_battery_mode mode)
-{
-	int ret, original_val;
-
-	original_val = bq20z75_read_word_data(client, BATTERY_MODE_OFFSET);
-	if (original_val < 0)
-		return original_val;
-
-	if ((original_val & BATTERY_MODE_MASK) == mode)
-		return mode;
-
-	if (mode == BATTERY_MODE_AMPS)
-		ret = original_val & ~BATTERY_MODE_MASK;
-	else
-		ret = original_val | BATTERY_MODE_MASK;
-
-	ret = bq20z75_write_word_data(client, BATTERY_MODE_OFFSET, ret);
-	if (ret < 0)
-		return ret;
-
-	return original_val & BATTERY_MODE_MASK;
-}
-
-static int bq20z75_get_battery_capacity(struct i2c_client *client,
-	int reg_offset, enum power_supply_property psp,
-	union power_supply_propval *val)
-{
-	s32 ret;
-	enum bq20z75_battery_mode mode = BATTERY_MODE_WATTS;
-
-	if (power_supply_is_amp_property(psp))
-		mode = BATTERY_MODE_AMPS;
-
-	mode = bq20z75_set_battery_mode(client, mode);
-	if (mode < 0)
-		return mode;
-
-	ret = bq20z75_read_word_data(client, bq20z75_data[reg_offset].addr);
-	if (ret < 0)
-		return ret;
-
-	if (psp == POWER_SUPPLY_PROP_CAPACITY) {
-		/* bq20z75 spec says that this can be >100 %
-		* even if max value is 100 % */
-		val->intval = min(ret, 100);
-	} else
-		val->intval = ret;
-
-	ret = bq20z75_set_battery_mode(client, mode);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static char bq20z75_serial[5];
-static int bq20z75_get_battery_serial_number(struct i2c_client *client,
-	union power_supply_propval *val)
-{
-	int ret;
-
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[REG_SERIAL_NUMBER].addr);
-	if (ret < 0)
-		return ret;
-
-	ret = sprintf(bq20z75_serial, "%04x", ret);
-	val->strval = bq20z75_serial;
-
-	return 0;
-}
-
-static int bq20z75_get_property_index(struct i2c_client *client,
-	enum power_supply_property psp)
-{
-	int count;
-	for (count = 0; count < ARRAY_SIZE(bq20z75_data); count++)
-		if (psp == bq20z75_data[count].psp)
-			return count;
-
-	dev_warn(&client->dev,
-		"%s: Invalid Property - %d\n", __func__, psp);
-
-	return -EINVAL;
-}
-
-static int bq20z75_get_property(struct power_supply *psy,
-	enum power_supply_property psp,
-	union power_supply_propval *val)
-{
-	int ret = 0;
-	struct bq20z75_info *bq20z75_device = container_of(psy,
-				struct bq20z75_info, power_supply);
-	struct i2c_client *client = bq20z75_device->client;
-
-	switch (psp) {
-	case POWER_SUPPLY_PROP_PRESENT:
-	case POWER_SUPPLY_PROP_HEALTH:
-		ret = bq20z75_get_battery_presence_and_health(client, psp, val);
-		if (psp == POWER_SUPPLY_PROP_PRESENT)
-			return 0;
-		break;
-
-	case POWER_SUPPLY_PROP_TECHNOLOGY:
-		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
-		break;
-
-	case POWER_SUPPLY_PROP_ENERGY_NOW:
-	case POWER_SUPPLY_PROP_ENERGY_FULL:
-	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
-	case POWER_SUPPLY_PROP_CHARGE_NOW:
-	case POWER_SUPPLY_PROP_CHARGE_FULL:
-	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
-	case POWER_SUPPLY_PROP_CAPACITY:
-		ret = bq20z75_get_property_index(client, psp);
-		if (ret < 0)
-			break;
-
-		ret = bq20z75_get_battery_capacity(client, ret, psp, val);
-		break;
-
-	case POWER_SUPPLY_PROP_SERIAL_NUMBER:
-		ret = bq20z75_get_battery_serial_number(client, val);
-		break;
-
-	case POWER_SUPPLY_PROP_STATUS:
-	case POWER_SUPPLY_PROP_CYCLE_COUNT:
-	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-	case POWER_SUPPLY_PROP_CURRENT_NOW:
-	case POWER_SUPPLY_PROP_TEMP:
-	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
-	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
-	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-		ret = bq20z75_get_property_index(client, psp);
-		if (ret < 0)
-			break;
-
-		ret = bq20z75_get_battery_property(client, ret, psp, val);
-		break;
-
-	default:
-		dev_err(&client->dev,
-			"%s: INVALID property\n", __func__);
-		return -EINVAL;
-	}
-
-	if (!bq20z75_device->enable_detection)
-		goto done;
-
-	if (!bq20z75_device->gpio_detect &&
-		bq20z75_device->is_present != (ret >= 0)) {
-		bq20z75_device->is_present = (ret >= 0);
-		power_supply_changed(&bq20z75_device->power_supply);
-	}
-
-done:
-	if (!ret) {
-		/* Convert units to match requirements for power supply class */
-		bq20z75_unit_adjustment(client, psp, val);
-	}
-
-	dev_dbg(&client->dev,
-		"%s: property = %d, value = %x\n", __func__, psp, val->intval);
-
-	if (ret && bq20z75_device->is_present)
-		return ret;
-
-	/* battery not present, so return NODATA for properties */
-	if (ret)
-		return -ENODATA;
-
-	return 0;
-}
-
-static irqreturn_t bq20z75_irq(int irq, void *devid)
-{
-	struct power_supply *battery = devid;
-
-	power_supply_changed(battery);
-
-	return IRQ_HANDLED;
-}
-
-static void bq20z75_external_power_changed(struct power_supply *psy)
-{
-	struct bq20z75_info *bq20z75_device;
-
-	bq20z75_device = container_of(psy, struct bq20z75_info, power_supply);
-
-	if (bq20z75_device->ignore_changes > 0) {
-		bq20z75_device->ignore_changes--;
-		return;
-	}
-
-	/* cancel outstanding work */
-	cancel_delayed_work_sync(&bq20z75_device->work);
-
-	schedule_delayed_work(&bq20z75_device->work, HZ);
-	bq20z75_device->poll_time = bq20z75_device->pdata->poll_retry_count;
-}
-
-static void bq20z75_delayed_work(struct work_struct *work)
-{
-	struct bq20z75_info *bq20z75_device;
-	s32 ret;
-
-	bq20z75_device = container_of(work, struct bq20z75_info, work.work);
-
-	ret = bq20z75_read_word_data(bq20z75_device->client,
-				     bq20z75_data[REG_STATUS].addr);
-	/* if the read failed, give up on this work */
-	if (ret < 0) {
-		bq20z75_device->poll_time = 0;
-		return;
-	}
-
-	if (ret & BATTERY_FULL_CHARGED)
-		ret = POWER_SUPPLY_STATUS_FULL;
-	else if (ret & BATTERY_FULL_DISCHARGED)
-		ret = POWER_SUPPLY_STATUS_NOT_CHARGING;
-	else if (ret & BATTERY_DISCHARGING)
-		ret = POWER_SUPPLY_STATUS_DISCHARGING;
-	else
-		ret = POWER_SUPPLY_STATUS_CHARGING;
-
-	if (bq20z75_device->last_state != ret) {
-		bq20z75_device->poll_time = 0;
-		power_supply_changed(&bq20z75_device->power_supply);
-		return;
-	}
-	if (bq20z75_device->poll_time > 0) {
-		schedule_delayed_work(&bq20z75_device->work, HZ);
-		bq20z75_device->poll_time--;
-		return;
-	}
-}
-
-#if defined(CONFIG_OF)
-
-#include <linux/of_device.h>
-#include <linux/of_gpio.h>
-
-static const struct of_device_id bq20z75_dt_ids[] = {
-	{ .compatible = "ti,bq20z75" },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, bq20z75_dt_ids);
-
-static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
-		struct i2c_client *client)
-{
-	struct device_node *of_node = client->dev.of_node;
-	struct bq20z75_platform_data *pdata = client->dev.platform_data;
-	enum of_gpio_flags gpio_flags;
-	int rc;
-	u32 prop;
-
-	/* verify this driver matches this device */
-	if (!of_node)
-		return NULL;
-
-	/* if platform data is set, honor it */
-	if (pdata)
-		return pdata;
-
-	/* first make sure at least one property is set, otherwise
-	 * it won't change behavior from running without pdata.
-	 */
-	if (!of_get_property(of_node, "ti,i2c-retry-count", NULL) &&
-		!of_get_property(of_node, "ti,poll-retry-count", NULL) &&
-		!of_get_property(of_node, "ti,battery-detect-gpios", NULL))
-		goto of_out;
-
-	pdata = devm_kzalloc(&client->dev, sizeof(struct bq20z75_platform_data),
-				GFP_KERNEL);
-	if (!pdata)
-		goto of_out;
-
-	rc = of_property_read_u32(of_node, "ti,i2c-retry-count", &prop);
-	if (!rc)
-		pdata->i2c_retry_count = prop;
-
-	rc = of_property_read_u32(of_node, "ti,poll-retry-count", &prop);
-	if (!rc)
-		pdata->poll_retry_count = prop;
-
-	if (!of_get_property(of_node, "ti,battery-detect-gpios", NULL)) {
-		pdata->battery_detect = -1;
-		goto of_out;
-	}
-
-	pdata->battery_detect = of_get_named_gpio_flags(of_node,
-			"ti,battery-detect-gpios", 0, &gpio_flags);
-
-	if (gpio_flags & OF_GPIO_ACTIVE_LOW)
-		pdata->battery_detect_present = 0;
-	else
-		pdata->battery_detect_present = 1;
-
-of_out:
-	return pdata;
-}
-#else
-#define bq20z75_dt_ids NULL
-static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
-	struct i2c_client *client)
-{
-	return client->dev.platform_data;
-}
-#endif
-
-static int __devinit bq20z75_probe(struct i2c_client *client,
-	const struct i2c_device_id *id)
-{
-	struct bq20z75_info *bq20z75_device;
-	struct bq20z75_platform_data *pdata = client->dev.platform_data;
-	int rc;
-	int irq;
-
-	bq20z75_device = kzalloc(sizeof(struct bq20z75_info), GFP_KERNEL);
-	if (!bq20z75_device)
-		return -ENOMEM;
-
-	bq20z75_device->client = client;
-	bq20z75_device->enable_detection = false;
-	bq20z75_device->gpio_detect = false;
-	bq20z75_device->power_supply.name = "battery";
-	bq20z75_device->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
-	bq20z75_device->power_supply.properties = bq20z75_properties;
-	bq20z75_device->power_supply.num_properties =
-		ARRAY_SIZE(bq20z75_properties);
-	bq20z75_device->power_supply.get_property = bq20z75_get_property;
-	/* ignore first notification of external change, it is generated
-	 * from the power_supply_register call back
-	 */
-	bq20z75_device->ignore_changes = 1;
-	bq20z75_device->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
-	bq20z75_device->power_supply.external_power_changed =
-		bq20z75_external_power_changed;
-
-	pdata = bq20z75_of_populate_pdata(client);
-
-	if (pdata) {
-		bq20z75_device->gpio_detect =
-			gpio_is_valid(pdata->battery_detect);
-		bq20z75_device->pdata = pdata;
-	}
-
-	i2c_set_clientdata(client, bq20z75_device);
-
-	if (!bq20z75_device->gpio_detect)
-		goto skip_gpio;
-
-	rc = gpio_request(pdata->battery_detect, dev_name(&client->dev));
-	if (rc) {
-		dev_warn(&client->dev, "Failed to request gpio: %d\n", rc);
-		bq20z75_device->gpio_detect = false;
-		goto skip_gpio;
-	}
-
-	rc = gpio_direction_input(pdata->battery_detect);
-	if (rc) {
-		dev_warn(&client->dev, "Failed to get gpio as input: %d\n", rc);
-		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
-		goto skip_gpio;
-	}
-
-	irq = gpio_to_irq(pdata->battery_detect);
-	if (irq <= 0) {
-		dev_warn(&client->dev, "Failed to get gpio as irq: %d\n", irq);
-		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
-		goto skip_gpio;
-	}
-
-	rc = request_irq(irq, bq20z75_irq,
-		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-		dev_name(&client->dev), &bq20z75_device->power_supply);
-	if (rc) {
-		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
-		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
-		goto skip_gpio;
-	}
-
-	bq20z75_device->irq = irq;
-
-skip_gpio:
-
-	rc = power_supply_register(&client->dev, &bq20z75_device->power_supply);
-	if (rc) {
-		dev_err(&client->dev,
-			"%s: Failed to register power supply\n", __func__);
-		goto exit_psupply;
-	}
-
-	dev_info(&client->dev,
-		"%s: battery gas gauge device registered\n", client->name);
-
-	INIT_DELAYED_WORK(&bq20z75_device->work, bq20z75_delayed_work);
-
-	bq20z75_device->enable_detection = true;
-
-	return 0;
-
-exit_psupply:
-	if (bq20z75_device->irq)
-		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
-	if (bq20z75_device->gpio_detect)
-		gpio_free(pdata->battery_detect);
-
-	kfree(bq20z75_device);
-
-	return rc;
-}
-
-static int __devexit bq20z75_remove(struct i2c_client *client)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-
-	if (bq20z75_device->irq)
-		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
-	if (bq20z75_device->gpio_detect)
-		gpio_free(bq20z75_device->pdata->battery_detect);
-
-	power_supply_unregister(&bq20z75_device->power_supply);
-
-	cancel_delayed_work_sync(&bq20z75_device->work);
-
-	kfree(bq20z75_device);
-	bq20z75_device = NULL;
-
-	return 0;
-}
-
-#if defined CONFIG_PM
-static int bq20z75_suspend(struct i2c_client *client,
-	pm_message_t state)
-{
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
-	s32 ret;
-
-	if (bq20z75_device->poll_time > 0)
-		cancel_delayed_work_sync(&bq20z75_device->work);
-
-	/* write to manufacturer access with sleep command */
-	ret = bq20z75_write_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr,
-		MANUFACTURER_ACCESS_SLEEP);
-	if (bq20z75_device->is_present && ret < 0)
-		return ret;
-
-	return 0;
-}
-#else
-#define bq20z75_suspend		NULL
-#endif
-/* any smbus transaction will wake up bq20z75 */
-#define bq20z75_resume		NULL
-
-static const struct i2c_device_id bq20z75_id[] = {
-	{ "bq20z75", 0 },
-	{}
-};
-MODULE_DEVICE_TABLE(i2c, bq20z75_id);
-
-static struct i2c_driver bq20z75_battery_driver = {
-	.probe		= bq20z75_probe,
-	.remove		= __devexit_p(bq20z75_remove),
-	.suspend	= bq20z75_suspend,
-	.resume		= bq20z75_resume,
-	.id_table	= bq20z75_id,
-	.driver = {
-		.name	= "bq20z75-battery",
-		.of_match_table = bq20z75_dt_ids,
-	},
-};
-
-static int __init bq20z75_battery_init(void)
-{
-	return i2c_add_driver(&bq20z75_battery_driver);
-}
-module_init(bq20z75_battery_init);
-
-static void __exit bq20z75_battery_exit(void)
-{
-	i2c_del_driver(&bq20z75_battery_driver);
-}
-module_exit(bq20z75_battery_exit);
-
-MODULE_DESCRIPTION("BQ20z75 battery monitor driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/power/sbs-battery.c b/drivers/power/sbs-battery.c
new file mode 100644
index 000000000000..ce95ff791016
--- /dev/null
+++ b/drivers/power/sbs-battery.c
@@ -0,0 +1,871 @@
+/*
+ * Gas Gauge driver for TI's BQ20Z75
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/power_supply.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+
+#include <linux/power/bq20z75.h>
+
+enum {
+	REG_MANUFACTURER_DATA,
+	REG_TEMPERATURE,
+	REG_VOLTAGE,
+	REG_CURRENT,
+	REG_CAPACITY,
+	REG_TIME_TO_EMPTY,
+	REG_TIME_TO_FULL,
+	REG_STATUS,
+	REG_CYCLE_COUNT,
+	REG_SERIAL_NUMBER,
+	REG_REMAINING_CAPACITY,
+	REG_REMAINING_CAPACITY_CHARGE,
+	REG_FULL_CHARGE_CAPACITY,
+	REG_FULL_CHARGE_CAPACITY_CHARGE,
+	REG_DESIGN_CAPACITY,
+	REG_DESIGN_CAPACITY_CHARGE,
+	REG_DESIGN_VOLTAGE,
+};
+
+/* Battery Mode defines */
+#define BATTERY_MODE_OFFSET		0x03
+#define BATTERY_MODE_MASK		0x8000
+enum bq20z75_battery_mode {
+	BATTERY_MODE_AMPS,
+	BATTERY_MODE_WATTS
+};
+
+/* manufacturer access defines */
+#define MANUFACTURER_ACCESS_STATUS	0x0006
+#define MANUFACTURER_ACCESS_SLEEP	0x0011
+
+/* battery status value bits */
+#define BATTERY_DISCHARGING		0x40
+#define BATTERY_FULL_CHARGED		0x20
+#define BATTERY_FULL_DISCHARGED		0x10
+
+#define BQ20Z75_DATA(_psp, _addr, _min_value, _max_value) { \
+	.psp = _psp, \
+	.addr = _addr, \
+	.min_value = _min_value, \
+	.max_value = _max_value, \
+}
+
+static const struct bq20z75_device_data {
+	enum power_supply_property psp;
+	u8 addr;
+	int min_value;
+	int max_value;
+} bq20z75_data[] = {
+	[REG_MANUFACTURER_DATA] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_PRESENT, 0x00, 0, 65535),
+	[REG_TEMPERATURE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_TEMP, 0x08, 0, 65535),
+	[REG_VOLTAGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 20000),
+	[REG_CURRENT] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CURRENT_NOW, 0x0A, -32768,
+			32767),
+	[REG_CAPACITY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CAPACITY, 0x0E, 0, 100),
+	[REG_REMAINING_CAPACITY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_NOW, 0x0F, 0, 65535),
+	[REG_REMAINING_CAPACITY_CHARGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_NOW, 0x0F, 0, 65535),
+	[REG_FULL_CHARGE_CAPACITY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
+	[REG_FULL_CHARGE_CAPACITY_CHARGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
+	[REG_TIME_TO_EMPTY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0,
+			65535),
+	[REG_TIME_TO_FULL] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, 0x13, 0,
+			65535),
+	[REG_STATUS] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_STATUS, 0x16, 0, 65535),
+	[REG_CYCLE_COUNT] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CYCLE_COUNT, 0x17, 0, 65535),
+	[REG_DESIGN_CAPACITY] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, 0x18, 0,
+			65535),
+	[REG_DESIGN_CAPACITY_CHARGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, 0x18, 0,
+			65535),
+	[REG_DESIGN_VOLTAGE] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, 0x19, 0,
+			65535),
+	[REG_SERIAL_NUMBER] =
+		BQ20Z75_DATA(POWER_SUPPLY_PROP_SERIAL_NUMBER, 0x1C, 0, 65535),
+};
+
+static enum power_supply_property bq20z75_properties[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_CYCLE_COUNT,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG,
+	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
+	POWER_SUPPLY_PROP_SERIAL_NUMBER,
+	POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN,
+	POWER_SUPPLY_PROP_ENERGY_NOW,
+	POWER_SUPPLY_PROP_ENERGY_FULL,
+	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+};
+
+struct bq20z75_info {
+	struct i2c_client		*client;
+	struct power_supply		power_supply;
+	struct bq20z75_platform_data	*pdata;
+	bool				is_present;
+	bool				gpio_detect;
+	bool				enable_detection;
+	int				irq;
+	int				last_state;
+	int				poll_time;
+	struct delayed_work		work;
+	int				ignore_changes;
+};
+
+static int bq20z75_read_word_data(struct i2c_client *client, u8 address)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret = 0;
+	int retries = 1;
+
+	if (bq20z75_device->pdata)
+		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+
+	while (retries > 0) {
+		ret = i2c_smbus_read_word_data(client, address);
+		if (ret >= 0)
+			break;
+		retries--;
+	}
+
+	if (ret < 0) {
+		dev_dbg(&client->dev,
+			"%s: i2c read at address 0x%x failed\n",
+			__func__, address);
+		return ret;
+	}
+
+	return le16_to_cpu(ret);
+}
+
+static int bq20z75_write_word_data(struct i2c_client *client, u8 address,
+	u16 value)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret = 0;
+	int retries = 1;
+
+	if (bq20z75_device->pdata)
+		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+
+	while (retries > 0) {
+		ret = i2c_smbus_write_word_data(client, address,
+			le16_to_cpu(value));
+		if (ret >= 0)
+			break;
+		retries--;
+	}
+
+	if (ret < 0) {
+		dev_dbg(&client->dev,
+			"%s: i2c write to address 0x%x failed\n",
+			__func__, address);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int bq20z75_get_battery_presence_and_health(
+	struct i2c_client *client, enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	s32 ret;
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+
+	if (psp == POWER_SUPPLY_PROP_PRESENT &&
+		bq20z75_device->gpio_detect) {
+		ret = gpio_get_value(
+			bq20z75_device->pdata->battery_detect);
+		if (ret == bq20z75_device->pdata->battery_detect_present)
+			val->intval = 1;
+		else
+			val->intval = 0;
+		bq20z75_device->is_present = val->intval;
+		return ret;
+	}
+
+	/* Write to ManufacturerAccess with
+	 * ManufacturerAccess command and then
+	 * read the status */
+	ret = bq20z75_write_word_data(client,
+		bq20z75_data[REG_MANUFACTURER_DATA].addr,
+		MANUFACTURER_ACCESS_STATUS);
+	if (ret < 0) {
+		if (psp == POWER_SUPPLY_PROP_PRESENT)
+			val->intval = 0; /* battery removed */
+		return ret;
+	}
+
+	ret = bq20z75_read_word_data(client,
+		bq20z75_data[REG_MANUFACTURER_DATA].addr);
+	if (ret < 0)
+		return ret;
+
+	if (ret < bq20z75_data[REG_MANUFACTURER_DATA].min_value ||
+	    ret > bq20z75_data[REG_MANUFACTURER_DATA].max_value) {
+		val->intval = 0;
+		return 0;
+	}
+
+	/* Mask the upper nibble of 2nd byte and
+	 * lower byte of response then
+	 * shift the result by 8 to get status*/
+	ret &= 0x0F00;
+	ret >>= 8;
+	if (psp == POWER_SUPPLY_PROP_PRESENT) {
+		if (ret == 0x0F)
+			/* battery removed */
+			val->intval = 0;
+		else
+			val->intval = 1;
+	} else if (psp == POWER_SUPPLY_PROP_HEALTH) {
+		if (ret == 0x09)
+			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		else if (ret == 0x0B)
+			val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
+		else if (ret == 0x0C)
+			val->intval = POWER_SUPPLY_HEALTH_DEAD;
+		else
+			val->intval = POWER_SUPPLY_HEALTH_GOOD;
+	}
+
+	return 0;
+}
+
+static int bq20z75_get_battery_property(struct i2c_client *client,
+	int reg_offset, enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret;
+
+	ret = bq20z75_read_word_data(client,
+		bq20z75_data[reg_offset].addr);
+	if (ret < 0)
+		return ret;
+
+	/* returned values are 16 bit */
+	if (bq20z75_data[reg_offset].min_value < 0)
+		ret = (s16)ret;
+
+	if (ret >= bq20z75_data[reg_offset].min_value &&
+	    ret <= bq20z75_data[reg_offset].max_value) {
+		val->intval = ret;
+		if (psp != POWER_SUPPLY_PROP_STATUS)
+			return 0;
+
+		if (ret & BATTERY_FULL_CHARGED)
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		else if (ret & BATTERY_FULL_DISCHARGED)
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		else if (ret & BATTERY_DISCHARGING)
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+
+		if (bq20z75_device->poll_time == 0)
+			bq20z75_device->last_state = val->intval;
+		else if (bq20z75_device->last_state != val->intval) {
+			cancel_delayed_work_sync(&bq20z75_device->work);
+			power_supply_changed(&bq20z75_device->power_supply);
+			bq20z75_device->poll_time = 0;
+		}
+	} else {
+		if (psp == POWER_SUPPLY_PROP_STATUS)
+			val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
+		else
+			val->intval = 0;
+	}
+
+	return 0;
+}
+
+static void  bq20z75_unit_adjustment(struct i2c_client *client,
+	enum power_supply_property psp, union power_supply_propval *val)
+{
+#define BASE_UNIT_CONVERSION		1000
+#define BATTERY_MODE_CAP_MULT_WATT	(10 * BASE_UNIT_CONVERSION)
+#define TIME_UNIT_CONVERSION		60
+#define TEMP_KELVIN_TO_CELSIUS		2731
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+	case POWER_SUPPLY_PROP_ENERGY_FULL:
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+		/* bq20z75 provides energy in units of 10mWh.
+		 * Convert to µWh
+		 */
+		val->intval *= BATTERY_MODE_CAP_MULT_WATT;
+		break;
+
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+		val->intval *= BASE_UNIT_CONVERSION;
+		break;
+
+	case POWER_SUPPLY_PROP_TEMP:
+		/* bq20z75 provides battery temperature in 0.1K
+		 * so convert it to 0.1°C
+		 */
+		val->intval -= TEMP_KELVIN_TO_CELSIUS;
+		break;
+
+	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
+	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
+		/* bq20z75 provides time to empty and time to full in minutes.
+		 * Convert to seconds
+		 */
+		val->intval *= TIME_UNIT_CONVERSION;
+		break;
+
+	default:
+		dev_dbg(&client->dev,
+			"%s: no need for unit conversion %d\n", __func__, psp);
+	}
+}
+
+static enum bq20z75_battery_mode
+bq20z75_set_battery_mode(struct i2c_client *client,
+	enum bq20z75_battery_mode mode)
+{
+	int ret, original_val;
+
+	original_val = bq20z75_read_word_data(client, BATTERY_MODE_OFFSET);
+	if (original_val < 0)
+		return original_val;
+
+	if ((original_val & BATTERY_MODE_MASK) == mode)
+		return mode;
+
+	if (mode == BATTERY_MODE_AMPS)
+		ret = original_val & ~BATTERY_MODE_MASK;
+	else
+		ret = original_val | BATTERY_MODE_MASK;
+
+	ret = bq20z75_write_word_data(client, BATTERY_MODE_OFFSET, ret);
+	if (ret < 0)
+		return ret;
+
+	return original_val & BATTERY_MODE_MASK;
+}
+
+static int bq20z75_get_battery_capacity(struct i2c_client *client,
+	int reg_offset, enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	s32 ret;
+	enum bq20z75_battery_mode mode = BATTERY_MODE_WATTS;
+
+	if (power_supply_is_amp_property(psp))
+		mode = BATTERY_MODE_AMPS;
+
+	mode = bq20z75_set_battery_mode(client, mode);
+	if (mode < 0)
+		return mode;
+
+	ret = bq20z75_read_word_data(client, bq20z75_data[reg_offset].addr);
+	if (ret < 0)
+		return ret;
+
+	if (psp == POWER_SUPPLY_PROP_CAPACITY) {
+		/* bq20z75 spec says that this can be >100 %
+		* even if max value is 100 % */
+		val->intval = min(ret, 100);
+	} else
+		val->intval = ret;
+
+	ret = bq20z75_set_battery_mode(client, mode);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static char bq20z75_serial[5];
+static int bq20z75_get_battery_serial_number(struct i2c_client *client,
+	union power_supply_propval *val)
+{
+	int ret;
+
+	ret = bq20z75_read_word_data(client,
+		bq20z75_data[REG_SERIAL_NUMBER].addr);
+	if (ret < 0)
+		return ret;
+
+	ret = sprintf(bq20z75_serial, "%04x", ret);
+	val->strval = bq20z75_serial;
+
+	return 0;
+}
+
+static int bq20z75_get_property_index(struct i2c_client *client,
+	enum power_supply_property psp)
+{
+	int count;
+	for (count = 0; count < ARRAY_SIZE(bq20z75_data); count++)
+		if (psp == bq20z75_data[count].psp)
+			return count;
+
+	dev_warn(&client->dev,
+		"%s: Invalid Property - %d\n", __func__, psp);
+
+	return -EINVAL;
+}
+
+static int bq20z75_get_property(struct power_supply *psy,
+	enum power_supply_property psp,
+	union power_supply_propval *val)
+{
+	int ret = 0;
+	struct bq20z75_info *bq20z75_device = container_of(psy,
+				struct bq20z75_info, power_supply);
+	struct i2c_client *client = bq20z75_device->client;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_PRESENT:
+	case POWER_SUPPLY_PROP_HEALTH:
+		ret = bq20z75_get_battery_presence_and_health(client, psp, val);
+		if (psp == POWER_SUPPLY_PROP_PRESENT)
+			return 0;
+		break;
+
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = POWER_SUPPLY_TECHNOLOGY_LION;
+		break;
+
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+	case POWER_SUPPLY_PROP_ENERGY_FULL:
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_CAPACITY:
+		ret = bq20z75_get_property_index(client, psp);
+		if (ret < 0)
+			break;
+
+		ret = bq20z75_get_battery_capacity(client, ret, psp, val);
+		break;
+
+	case POWER_SUPPLY_PROP_SERIAL_NUMBER:
+		ret = bq20z75_get_battery_serial_number(client, val);
+		break;
+
+	case POWER_SUPPLY_PROP_STATUS:
+	case POWER_SUPPLY_PROP_CYCLE_COUNT:
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+	case POWER_SUPPLY_PROP_TEMP:
+	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
+	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
+		ret = bq20z75_get_property_index(client, psp);
+		if (ret < 0)
+			break;
+
+		ret = bq20z75_get_battery_property(client, ret, psp, val);
+		break;
+
+	default:
+		dev_err(&client->dev,
+			"%s: INVALID property\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!bq20z75_device->enable_detection)
+		goto done;
+
+	if (!bq20z75_device->gpio_detect &&
+		bq20z75_device->is_present != (ret >= 0)) {
+		bq20z75_device->is_present = (ret >= 0);
+		power_supply_changed(&bq20z75_device->power_supply);
+	}
+
+done:
+	if (!ret) {
+		/* Convert units to match requirements for power supply class */
+		bq20z75_unit_adjustment(client, psp, val);
+	}
+
+	dev_dbg(&client->dev,
+		"%s: property = %d, value = %x\n", __func__, psp, val->intval);
+
+	if (ret && bq20z75_device->is_present)
+		return ret;
+
+	/* battery not present, so return NODATA for properties */
+	if (ret)
+		return -ENODATA;
+
+	return 0;
+}
+
+static irqreturn_t bq20z75_irq(int irq, void *devid)
+{
+	struct power_supply *battery = devid;
+
+	power_supply_changed(battery);
+
+	return IRQ_HANDLED;
+}
+
+static void bq20z75_external_power_changed(struct power_supply *psy)
+{
+	struct bq20z75_info *bq20z75_device;
+
+	bq20z75_device = container_of(psy, struct bq20z75_info, power_supply);
+
+	if (bq20z75_device->ignore_changes > 0) {
+		bq20z75_device->ignore_changes--;
+		return;
+	}
+
+	/* cancel outstanding work */
+	cancel_delayed_work_sync(&bq20z75_device->work);
+
+	schedule_delayed_work(&bq20z75_device->work, HZ);
+	bq20z75_device->poll_time = bq20z75_device->pdata->poll_retry_count;
+}
+
+static void bq20z75_delayed_work(struct work_struct *work)
+{
+	struct bq20z75_info *bq20z75_device;
+	s32 ret;
+
+	bq20z75_device = container_of(work, struct bq20z75_info, work.work);
+
+	ret = bq20z75_read_word_data(bq20z75_device->client,
+				     bq20z75_data[REG_STATUS].addr);
+	/* if the read failed, give up on this work */
+	if (ret < 0) {
+		bq20z75_device->poll_time = 0;
+		return;
+	}
+
+	if (ret & BATTERY_FULL_CHARGED)
+		ret = POWER_SUPPLY_STATUS_FULL;
+	else if (ret & BATTERY_FULL_DISCHARGED)
+		ret = POWER_SUPPLY_STATUS_NOT_CHARGING;
+	else if (ret & BATTERY_DISCHARGING)
+		ret = POWER_SUPPLY_STATUS_DISCHARGING;
+	else
+		ret = POWER_SUPPLY_STATUS_CHARGING;
+
+	if (bq20z75_device->last_state != ret) {
+		bq20z75_device->poll_time = 0;
+		power_supply_changed(&bq20z75_device->power_supply);
+		return;
+	}
+	if (bq20z75_device->poll_time > 0) {
+		schedule_delayed_work(&bq20z75_device->work, HZ);
+		bq20z75_device->poll_time--;
+		return;
+	}
+}
+
+#if defined(CONFIG_OF)
+
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+
+static const struct of_device_id bq20z75_dt_ids[] = {
+	{ .compatible = "ti,bq20z75" },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, bq20z75_dt_ids);
+
+static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
+		struct i2c_client *client)
+{
+	struct device_node *of_node = client->dev.of_node;
+	struct bq20z75_platform_data *pdata = client->dev.platform_data;
+	enum of_gpio_flags gpio_flags;
+	int rc;
+	u32 prop;
+
+	/* verify this driver matches this device */
+	if (!of_node)
+		return NULL;
+
+	/* if platform data is set, honor it */
+	if (pdata)
+		return pdata;
+
+	/* first make sure at least one property is set, otherwise
+	 * it won't change behavior from running without pdata.
+	 */
+	if (!of_get_property(of_node, "ti,i2c-retry-count", NULL) &&
+		!of_get_property(of_node, "ti,poll-retry-count", NULL) &&
+		!of_get_property(of_node, "ti,battery-detect-gpios", NULL))
+		goto of_out;
+
+	pdata = devm_kzalloc(&client->dev, sizeof(struct bq20z75_platform_data),
+				GFP_KERNEL);
+	if (!pdata)
+		goto of_out;
+
+	rc = of_property_read_u32(of_node, "ti,i2c-retry-count", &prop);
+	if (!rc)
+		pdata->i2c_retry_count = prop;
+
+	rc = of_property_read_u32(of_node, "ti,poll-retry-count", &prop);
+	if (!rc)
+		pdata->poll_retry_count = prop;
+
+	if (!of_get_property(of_node, "ti,battery-detect-gpios", NULL)) {
+		pdata->battery_detect = -1;
+		goto of_out;
+	}
+
+	pdata->battery_detect = of_get_named_gpio_flags(of_node,
+			"ti,battery-detect-gpios", 0, &gpio_flags);
+
+	if (gpio_flags & OF_GPIO_ACTIVE_LOW)
+		pdata->battery_detect_present = 0;
+	else
+		pdata->battery_detect_present = 1;
+
+of_out:
+	return pdata;
+}
+#else
+#define bq20z75_dt_ids NULL
+static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
+	struct i2c_client *client)
+{
+	return client->dev.platform_data;
+}
+#endif
+
+static int __devinit bq20z75_probe(struct i2c_client *client,
+	const struct i2c_device_id *id)
+{
+	struct bq20z75_info *bq20z75_device;
+	struct bq20z75_platform_data *pdata = client->dev.platform_data;
+	int rc;
+	int irq;
+
+	bq20z75_device = kzalloc(sizeof(struct bq20z75_info), GFP_KERNEL);
+	if (!bq20z75_device)
+		return -ENOMEM;
+
+	bq20z75_device->client = client;
+	bq20z75_device->enable_detection = false;
+	bq20z75_device->gpio_detect = false;
+	bq20z75_device->power_supply.name = "battery";
+	bq20z75_device->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
+	bq20z75_device->power_supply.properties = bq20z75_properties;
+	bq20z75_device->power_supply.num_properties =
+		ARRAY_SIZE(bq20z75_properties);
+	bq20z75_device->power_supply.get_property = bq20z75_get_property;
+	/* ignore first notification of external change, it is generated
+	 * from the power_supply_register call back
+	 */
+	bq20z75_device->ignore_changes = 1;
+	bq20z75_device->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
+	bq20z75_device->power_supply.external_power_changed =
+		bq20z75_external_power_changed;
+
+	pdata = bq20z75_of_populate_pdata(client);
+
+	if (pdata) {
+		bq20z75_device->gpio_detect =
+			gpio_is_valid(pdata->battery_detect);
+		bq20z75_device->pdata = pdata;
+	}
+
+	i2c_set_clientdata(client, bq20z75_device);
+
+	if (!bq20z75_device->gpio_detect)
+		goto skip_gpio;
+
+	rc = gpio_request(pdata->battery_detect, dev_name(&client->dev));
+	if (rc) {
+		dev_warn(&client->dev, "Failed to request gpio: %d\n", rc);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	rc = gpio_direction_input(pdata->battery_detect);
+	if (rc) {
+		dev_warn(&client->dev, "Failed to get gpio as input: %d\n", rc);
+		gpio_free(pdata->battery_detect);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	irq = gpio_to_irq(pdata->battery_detect);
+	if (irq <= 0) {
+		dev_warn(&client->dev, "Failed to get gpio as irq: %d\n", irq);
+		gpio_free(pdata->battery_detect);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	rc = request_irq(irq, bq20z75_irq,
+		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+		dev_name(&client->dev), &bq20z75_device->power_supply);
+	if (rc) {
+		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
+		gpio_free(pdata->battery_detect);
+		bq20z75_device->gpio_detect = false;
+		goto skip_gpio;
+	}
+
+	bq20z75_device->irq = irq;
+
+skip_gpio:
+
+	rc = power_supply_register(&client->dev, &bq20z75_device->power_supply);
+	if (rc) {
+		dev_err(&client->dev,
+			"%s: Failed to register power supply\n", __func__);
+		goto exit_psupply;
+	}
+
+	dev_info(&client->dev,
+		"%s: battery gas gauge device registered\n", client->name);
+
+	INIT_DELAYED_WORK(&bq20z75_device->work, bq20z75_delayed_work);
+
+	bq20z75_device->enable_detection = true;
+
+	return 0;
+
+exit_psupply:
+	if (bq20z75_device->irq)
+		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
+	if (bq20z75_device->gpio_detect)
+		gpio_free(pdata->battery_detect);
+
+	kfree(bq20z75_device);
+
+	return rc;
+}
+
+static int __devexit bq20z75_remove(struct i2c_client *client)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+
+	if (bq20z75_device->irq)
+		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
+	if (bq20z75_device->gpio_detect)
+		gpio_free(bq20z75_device->pdata->battery_detect);
+
+	power_supply_unregister(&bq20z75_device->power_supply);
+
+	cancel_delayed_work_sync(&bq20z75_device->work);
+
+	kfree(bq20z75_device);
+	bq20z75_device = NULL;
+
+	return 0;
+}
+
+#if defined CONFIG_PM
+static int bq20z75_suspend(struct i2c_client *client,
+	pm_message_t state)
+{
+	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	s32 ret;
+
+	if (bq20z75_device->poll_time > 0)
+		cancel_delayed_work_sync(&bq20z75_device->work);
+
+	/* write to manufacturer access with sleep command */
+	ret = bq20z75_write_word_data(client,
+		bq20z75_data[REG_MANUFACTURER_DATA].addr,
+		MANUFACTURER_ACCESS_SLEEP);
+	if (bq20z75_device->is_present && ret < 0)
+		return ret;
+
+	return 0;
+}
+#else
+#define bq20z75_suspend		NULL
+#endif
+/* any smbus transaction will wake up bq20z75 */
+#define bq20z75_resume		NULL
+
+static const struct i2c_device_id bq20z75_id[] = {
+	{ "bq20z75", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, bq20z75_id);
+
+static struct i2c_driver bq20z75_battery_driver = {
+	.probe		= bq20z75_probe,
+	.remove		= __devexit_p(bq20z75_remove),
+	.suspend	= bq20z75_suspend,
+	.resume		= bq20z75_resume,
+	.id_table	= bq20z75_id,
+	.driver = {
+		.name	= "bq20z75-battery",
+		.of_match_table = bq20z75_dt_ids,
+	},
+};
+
+static int __init bq20z75_battery_init(void)
+{
+	return i2c_add_driver(&bq20z75_battery_driver);
+}
+module_init(bq20z75_battery_init);
+
+static void __exit bq20z75_battery_exit(void)
+{
+	i2c_del_driver(&bq20z75_battery_driver);
+}
+module_exit(bq20z75_battery_exit);
+
+MODULE_DESCRIPTION("BQ20z75 battery monitor driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/power/bq20z75.h b/include/linux/power/bq20z75.h
deleted file mode 100644
index 1398eb004e83..000000000000
--- a/include/linux/power/bq20z75.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Gas Gauge driver for TI's BQ20Z75
- *
- * Copyright (c) 2010, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef __LINUX_POWER_BQ20Z75_H_
-#define __LINUX_POWER_BQ20Z75_H_
-
-#include <linux/power_supply.h>
-#include <linux/types.h>
-
-/**
- * struct bq20z75_platform_data - platform data for bq20z75 devices
- * @battery_detect:		GPIO which is used to detect battery presence
- * @battery_detect_present:	gpio state when battery is present (0 / 1)
- * @i2c_retry_count:		# of times to retry on i2c IO failure
- * @poll_retry_count:		# of times to retry looking for new status after
- *				external change notification
- */
-struct bq20z75_platform_data {
-	int battery_detect;
-	int battery_detect_present;
-	int i2c_retry_count;
-	int poll_retry_count;
-};
-
-#endif
diff --git a/include/linux/power/sbs-battery.h b/include/linux/power/sbs-battery.h
new file mode 100644
index 000000000000..1398eb004e83
--- /dev/null
+++ b/include/linux/power/sbs-battery.h
@@ -0,0 +1,42 @@
+/*
+ * Gas Gauge driver for TI's BQ20Z75
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __LINUX_POWER_BQ20Z75_H_
+#define __LINUX_POWER_BQ20Z75_H_
+
+#include <linux/power_supply.h>
+#include <linux/types.h>
+
+/**
+ * struct bq20z75_platform_data - platform data for bq20z75 devices
+ * @battery_detect:		GPIO which is used to detect battery presence
+ * @battery_detect_present:	gpio state when battery is present (0 / 1)
+ * @i2c_retry_count:		# of times to retry on i2c IO failure
+ * @poll_retry_count:		# of times to retry looking for new status after
+ *				external change notification
+ */
+struct bq20z75_platform_data {
+	int battery_detect;
+	int battery_detect_present;
+	int i2c_retry_count;
+	int poll_retry_count;
+};
+
+#endif
-- 
cgit v1.2.3


From 3ddca062f8d71724529b0d52609994c9886f1a18 Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Mon, 5 Dec 2011 17:50:46 -0800
Subject: sbs-battery: Rename internals to new name

Now that this driver is named more generally, this change updates
the internal variables, defines and functions to use this new name.

Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/sbs-battery.c       | 423 ++++++++++++++++++--------------------
 include/linux/power/sbs-battery.h |  10 +-
 2 files changed, 209 insertions(+), 224 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/sbs-battery.c b/drivers/power/sbs-battery.c
index ce95ff791016..00bd9e079e80 100644
--- a/drivers/power/sbs-battery.c
+++ b/drivers/power/sbs-battery.c
@@ -1,5 +1,5 @@
 /*
- * Gas Gauge driver for TI's BQ20Z75
+ * Gas Gauge driver for SBS Compliant Batteries
  *
  * Copyright (c) 2010, NVIDIA Corporation.
  *
@@ -28,7 +28,7 @@
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
 
-#include <linux/power/bq20z75.h>
+#include <linux/power/sbs-battery.h>
 
 enum {
 	REG_MANUFACTURER_DATA,
@@ -53,7 +53,7 @@ enum {
 /* Battery Mode defines */
 #define BATTERY_MODE_OFFSET		0x03
 #define BATTERY_MODE_MASK		0x8000
-enum bq20z75_battery_mode {
+enum sbs_battery_mode {
 	BATTERY_MODE_AMPS,
 	BATTERY_MODE_WATTS
 };
@@ -67,62 +67,56 @@ enum bq20z75_battery_mode {
 #define BATTERY_FULL_CHARGED		0x20
 #define BATTERY_FULL_DISCHARGED		0x10
 
-#define BQ20Z75_DATA(_psp, _addr, _min_value, _max_value) { \
+#define SBS_DATA(_psp, _addr, _min_value, _max_value) { \
 	.psp = _psp, \
 	.addr = _addr, \
 	.min_value = _min_value, \
 	.max_value = _max_value, \
 }
 
-static const struct bq20z75_device_data {
+static const struct chip_data {
 	enum power_supply_property psp;
 	u8 addr;
 	int min_value;
 	int max_value;
-} bq20z75_data[] = {
+} sbs_data[] = {
 	[REG_MANUFACTURER_DATA] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_PRESENT, 0x00, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_PRESENT, 0x00, 0, 65535),
 	[REG_TEMPERATURE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TEMP, 0x08, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_TEMP, 0x08, 0, 65535),
 	[REG_VOLTAGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 20000),
+		SBS_DATA(POWER_SUPPLY_PROP_VOLTAGE_NOW, 0x09, 0, 20000),
 	[REG_CURRENT] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CURRENT_NOW, 0x0A, -32768,
-			32767),
+		SBS_DATA(POWER_SUPPLY_PROP_CURRENT_NOW, 0x0A, -32768, 32767),
 	[REG_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CAPACITY, 0x0E, 0, 100),
+		SBS_DATA(POWER_SUPPLY_PROP_CAPACITY, 0x0E, 0, 100),
 	[REG_REMAINING_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_NOW, 0x0F, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_ENERGY_NOW, 0x0F, 0, 65535),
 	[REG_REMAINING_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_NOW, 0x0F, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_CHARGE_NOW, 0x0F, 0, 65535),
 	[REG_FULL_CHARGE_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_ENERGY_FULL, 0x10, 0, 65535),
 	[REG_FULL_CHARGE_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_CHARGE_FULL, 0x10, 0, 65535),
 	[REG_TIME_TO_EMPTY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG, 0x12, 0, 65535),
 	[REG_TIME_TO_FULL] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, 0x13, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_TIME_TO_FULL_AVG, 0x13, 0, 65535),
 	[REG_STATUS] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_STATUS, 0x16, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_STATUS, 0x16, 0, 65535),
 	[REG_CYCLE_COUNT] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CYCLE_COUNT, 0x17, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_CYCLE_COUNT, 0x17, 0, 65535),
 	[REG_DESIGN_CAPACITY] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, 0x18, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, 0x18, 0, 65535),
 	[REG_DESIGN_CAPACITY_CHARGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, 0x18, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN, 0x18, 0, 65535),
 	[REG_DESIGN_VOLTAGE] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, 0x19, 0,
-			65535),
+		SBS_DATA(POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, 0x19, 0, 65535),
 	[REG_SERIAL_NUMBER] =
-		BQ20Z75_DATA(POWER_SUPPLY_PROP_SERIAL_NUMBER, 0x1C, 0, 65535),
+		SBS_DATA(POWER_SUPPLY_PROP_SERIAL_NUMBER, 0x1C, 0, 65535),
 };
 
-static enum power_supply_property bq20z75_properties[] = {
+static enum power_supply_property sbs_properties[] = {
 	POWER_SUPPLY_PROP_STATUS,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_PRESENT,
@@ -144,10 +138,10 @@ static enum power_supply_property bq20z75_properties[] = {
 	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
 };
 
-struct bq20z75_info {
+struct sbs_info {
 	struct i2c_client		*client;
 	struct power_supply		power_supply;
-	struct bq20z75_platform_data	*pdata;
+	struct sbs_platform_data	*pdata;
 	bool				is_present;
 	bool				gpio_detect;
 	bool				enable_detection;
@@ -158,14 +152,14 @@ struct bq20z75_info {
 	int				ignore_changes;
 };
 
-static int bq20z75_read_word_data(struct i2c_client *client, u8 address)
+static int sbs_read_word_data(struct i2c_client *client, u8 address)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 	s32 ret = 0;
 	int retries = 1;
 
-	if (bq20z75_device->pdata)
-		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+	if (chip->pdata)
+		retries = max(chip->pdata->i2c_retry_count + 1, 1);
 
 	while (retries > 0) {
 		ret = i2c_smbus_read_word_data(client, address);
@@ -184,15 +178,15 @@ static int bq20z75_read_word_data(struct i2c_client *client, u8 address)
 	return le16_to_cpu(ret);
 }
 
-static int bq20z75_write_word_data(struct i2c_client *client, u8 address,
+static int sbs_write_word_data(struct i2c_client *client, u8 address,
 	u16 value)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 	s32 ret = 0;
 	int retries = 1;
 
-	if (bq20z75_device->pdata)
-		retries = max(bq20z75_device->pdata->i2c_retry_count + 1, 1);
+	if (chip->pdata)
+		retries = max(chip->pdata->i2c_retry_count + 1, 1);
 
 	while (retries > 0) {
 		ret = i2c_smbus_write_word_data(client, address,
@@ -212,44 +206,41 @@ static int bq20z75_write_word_data(struct i2c_client *client, u8 address,
 	return 0;
 }
 
-static int bq20z75_get_battery_presence_and_health(
+static int sbs_get_battery_presence_and_health(
 	struct i2c_client *client, enum power_supply_property psp,
 	union power_supply_propval *val)
 {
 	s32 ret;
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 
 	if (psp == POWER_SUPPLY_PROP_PRESENT &&
-		bq20z75_device->gpio_detect) {
-		ret = gpio_get_value(
-			bq20z75_device->pdata->battery_detect);
-		if (ret == bq20z75_device->pdata->battery_detect_present)
+		chip->gpio_detect) {
+		ret = gpio_get_value(chip->pdata->battery_detect);
+		if (ret == chip->pdata->battery_detect_present)
 			val->intval = 1;
 		else
 			val->intval = 0;
-		bq20z75_device->is_present = val->intval;
+		chip->is_present = val->intval;
 		return ret;
 	}
 
 	/* Write to ManufacturerAccess with
 	 * ManufacturerAccess command and then
 	 * read the status */
-	ret = bq20z75_write_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr,
-		MANUFACTURER_ACCESS_STATUS);
+	ret = sbs_write_word_data(client, sbs_data[REG_MANUFACTURER_DATA].addr,
+					MANUFACTURER_ACCESS_STATUS);
 	if (ret < 0) {
 		if (psp == POWER_SUPPLY_PROP_PRESENT)
 			val->intval = 0; /* battery removed */
 		return ret;
 	}
 
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr);
+	ret = sbs_read_word_data(client, sbs_data[REG_MANUFACTURER_DATA].addr);
 	if (ret < 0)
 		return ret;
 
-	if (ret < bq20z75_data[REG_MANUFACTURER_DATA].min_value ||
-	    ret > bq20z75_data[REG_MANUFACTURER_DATA].max_value) {
+	if (ret < sbs_data[REG_MANUFACTURER_DATA].min_value ||
+	    ret > sbs_data[REG_MANUFACTURER_DATA].max_value) {
 		val->intval = 0;
 		return 0;
 	}
@@ -279,24 +270,23 @@ static int bq20z75_get_battery_presence_and_health(
 	return 0;
 }
 
-static int bq20z75_get_battery_property(struct i2c_client *client,
+static int sbs_get_battery_property(struct i2c_client *client,
 	int reg_offset, enum power_supply_property psp,
 	union power_supply_propval *val)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 	s32 ret;
 
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[reg_offset].addr);
+	ret = sbs_read_word_data(client, sbs_data[reg_offset].addr);
 	if (ret < 0)
 		return ret;
 
 	/* returned values are 16 bit */
-	if (bq20z75_data[reg_offset].min_value < 0)
+	if (sbs_data[reg_offset].min_value < 0)
 		ret = (s16)ret;
 
-	if (ret >= bq20z75_data[reg_offset].min_value &&
-	    ret <= bq20z75_data[reg_offset].max_value) {
+	if (ret >= sbs_data[reg_offset].min_value &&
+	    ret <= sbs_data[reg_offset].max_value) {
 		val->intval = ret;
 		if (psp != POWER_SUPPLY_PROP_STATUS)
 			return 0;
@@ -310,12 +300,12 @@ static int bq20z75_get_battery_property(struct i2c_client *client,
 		else
 			val->intval = POWER_SUPPLY_STATUS_CHARGING;
 
-		if (bq20z75_device->poll_time == 0)
-			bq20z75_device->last_state = val->intval;
-		else if (bq20z75_device->last_state != val->intval) {
-			cancel_delayed_work_sync(&bq20z75_device->work);
-			power_supply_changed(&bq20z75_device->power_supply);
-			bq20z75_device->poll_time = 0;
+		if (chip->poll_time == 0)
+			chip->last_state = val->intval;
+		else if (chip->last_state != val->intval) {
+			cancel_delayed_work_sync(&chip->work);
+			power_supply_changed(&chip->power_supply);
+			chip->poll_time = 0;
 		}
 	} else {
 		if (psp == POWER_SUPPLY_PROP_STATUS)
@@ -327,7 +317,7 @@ static int bq20z75_get_battery_property(struct i2c_client *client,
 	return 0;
 }
 
-static void  bq20z75_unit_adjustment(struct i2c_client *client,
+static void  sbs_unit_adjustment(struct i2c_client *client,
 	enum power_supply_property psp, union power_supply_propval *val)
 {
 #define BASE_UNIT_CONVERSION		1000
@@ -338,7 +328,7 @@ static void  bq20z75_unit_adjustment(struct i2c_client *client,
 	case POWER_SUPPLY_PROP_ENERGY_NOW:
 	case POWER_SUPPLY_PROP_ENERGY_FULL:
 	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
-		/* bq20z75 provides energy in units of 10mWh.
+		/* sbs provides energy in units of 10mWh.
 		 * Convert to µWh
 		 */
 		val->intval *= BATTERY_MODE_CAP_MULT_WATT;
@@ -354,7 +344,7 @@ static void  bq20z75_unit_adjustment(struct i2c_client *client,
 		break;
 
 	case POWER_SUPPLY_PROP_TEMP:
-		/* bq20z75 provides battery temperature in 0.1K
+		/* sbs provides battery temperature in 0.1K
 		 * so convert it to 0.1°C
 		 */
 		val->intval -= TEMP_KELVIN_TO_CELSIUS;
@@ -362,7 +352,7 @@ static void  bq20z75_unit_adjustment(struct i2c_client *client,
 
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
 	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
-		/* bq20z75 provides time to empty and time to full in minutes.
+		/* sbs provides time to empty and time to full in minutes.
 		 * Convert to seconds
 		 */
 		val->intval *= TIME_UNIT_CONVERSION;
@@ -374,13 +364,12 @@ static void  bq20z75_unit_adjustment(struct i2c_client *client,
 	}
 }
 
-static enum bq20z75_battery_mode
-bq20z75_set_battery_mode(struct i2c_client *client,
-	enum bq20z75_battery_mode mode)
+static enum sbs_battery_mode sbs_set_battery_mode(struct i2c_client *client,
+	enum sbs_battery_mode mode)
 {
 	int ret, original_val;
 
-	original_val = bq20z75_read_word_data(client, BATTERY_MODE_OFFSET);
+	original_val = sbs_read_word_data(client, BATTERY_MODE_OFFSET);
 	if (original_val < 0)
 		return original_val;
 
@@ -392,68 +381,67 @@ bq20z75_set_battery_mode(struct i2c_client *client,
 	else
 		ret = original_val | BATTERY_MODE_MASK;
 
-	ret = bq20z75_write_word_data(client, BATTERY_MODE_OFFSET, ret);
+	ret = sbs_write_word_data(client, BATTERY_MODE_OFFSET, ret);
 	if (ret < 0)
 		return ret;
 
 	return original_val & BATTERY_MODE_MASK;
 }
 
-static int bq20z75_get_battery_capacity(struct i2c_client *client,
+static int sbs_get_battery_capacity(struct i2c_client *client,
 	int reg_offset, enum power_supply_property psp,
 	union power_supply_propval *val)
 {
 	s32 ret;
-	enum bq20z75_battery_mode mode = BATTERY_MODE_WATTS;
+	enum sbs_battery_mode mode = BATTERY_MODE_WATTS;
 
 	if (power_supply_is_amp_property(psp))
 		mode = BATTERY_MODE_AMPS;
 
-	mode = bq20z75_set_battery_mode(client, mode);
+	mode = sbs_set_battery_mode(client, mode);
 	if (mode < 0)
 		return mode;
 
-	ret = bq20z75_read_word_data(client, bq20z75_data[reg_offset].addr);
+	ret = sbs_read_word_data(client, sbs_data[reg_offset].addr);
 	if (ret < 0)
 		return ret;
 
 	if (psp == POWER_SUPPLY_PROP_CAPACITY) {
-		/* bq20z75 spec says that this can be >100 %
+		/* sbs spec says that this can be >100 %
 		* even if max value is 100 % */
 		val->intval = min(ret, 100);
 	} else
 		val->intval = ret;
 
-	ret = bq20z75_set_battery_mode(client, mode);
+	ret = sbs_set_battery_mode(client, mode);
 	if (ret < 0)
 		return ret;
 
 	return 0;
 }
 
-static char bq20z75_serial[5];
-static int bq20z75_get_battery_serial_number(struct i2c_client *client,
+static char sbs_serial[5];
+static int sbs_get_battery_serial_number(struct i2c_client *client,
 	union power_supply_propval *val)
 {
 	int ret;
 
-	ret = bq20z75_read_word_data(client,
-		bq20z75_data[REG_SERIAL_NUMBER].addr);
+	ret = sbs_read_word_data(client, sbs_data[REG_SERIAL_NUMBER].addr);
 	if (ret < 0)
 		return ret;
 
-	ret = sprintf(bq20z75_serial, "%04x", ret);
-	val->strval = bq20z75_serial;
+	ret = sprintf(sbs_serial, "%04x", ret);
+	val->strval = sbs_serial;
 
 	return 0;
 }
 
-static int bq20z75_get_property_index(struct i2c_client *client,
+static int sbs_get_property_index(struct i2c_client *client,
 	enum power_supply_property psp)
 {
 	int count;
-	for (count = 0; count < ARRAY_SIZE(bq20z75_data); count++)
-		if (psp == bq20z75_data[count].psp)
+	for (count = 0; count < ARRAY_SIZE(sbs_data); count++)
+		if (psp == sbs_data[count].psp)
 			return count;
 
 	dev_warn(&client->dev,
@@ -462,19 +450,19 @@ static int bq20z75_get_property_index(struct i2c_client *client,
 	return -EINVAL;
 }
 
-static int bq20z75_get_property(struct power_supply *psy,
+static int sbs_get_property(struct power_supply *psy,
 	enum power_supply_property psp,
 	union power_supply_propval *val)
 {
 	int ret = 0;
-	struct bq20z75_info *bq20z75_device = container_of(psy,
-				struct bq20z75_info, power_supply);
-	struct i2c_client *client = bq20z75_device->client;
+	struct sbs_info *chip = container_of(psy,
+				struct sbs_info, power_supply);
+	struct i2c_client *client = chip->client;
 
 	switch (psp) {
 	case POWER_SUPPLY_PROP_PRESENT:
 	case POWER_SUPPLY_PROP_HEALTH:
-		ret = bq20z75_get_battery_presence_and_health(client, psp, val);
+		ret = sbs_get_battery_presence_and_health(client, psp, val);
 		if (psp == POWER_SUPPLY_PROP_PRESENT)
 			return 0;
 		break;
@@ -490,15 +478,15 @@ static int bq20z75_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_CHARGE_FULL:
 	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
 	case POWER_SUPPLY_PROP_CAPACITY:
-		ret = bq20z75_get_property_index(client, psp);
+		ret = sbs_get_property_index(client, psp);
 		if (ret < 0)
 			break;
 
-		ret = bq20z75_get_battery_capacity(client, ret, psp, val);
+		ret = sbs_get_battery_capacity(client, ret, psp, val);
 		break;
 
 	case POWER_SUPPLY_PROP_SERIAL_NUMBER:
-		ret = bq20z75_get_battery_serial_number(client, val);
+		ret = sbs_get_battery_serial_number(client, val);
 		break;
 
 	case POWER_SUPPLY_PROP_STATUS:
@@ -509,11 +497,11 @@ static int bq20z75_get_property(struct power_supply *psy,
 	case POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG:
 	case POWER_SUPPLY_PROP_TIME_TO_FULL_AVG:
 	case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN:
-		ret = bq20z75_get_property_index(client, psp);
+		ret = sbs_get_property_index(client, psp);
 		if (ret < 0)
 			break;
 
-		ret = bq20z75_get_battery_property(client, ret, psp, val);
+		ret = sbs_get_battery_property(client, ret, psp, val);
 		break;
 
 	default:
@@ -522,25 +510,25 @@ static int bq20z75_get_property(struct power_supply *psy,
 		return -EINVAL;
 	}
 
-	if (!bq20z75_device->enable_detection)
+	if (!chip->enable_detection)
 		goto done;
 
-	if (!bq20z75_device->gpio_detect &&
-		bq20z75_device->is_present != (ret >= 0)) {
-		bq20z75_device->is_present = (ret >= 0);
-		power_supply_changed(&bq20z75_device->power_supply);
+	if (!chip->gpio_detect &&
+		chip->is_present != (ret >= 0)) {
+		chip->is_present = (ret >= 0);
+		power_supply_changed(&chip->power_supply);
 	}
 
 done:
 	if (!ret) {
 		/* Convert units to match requirements for power supply class */
-		bq20z75_unit_adjustment(client, psp, val);
+		sbs_unit_adjustment(client, psp, val);
 	}
 
 	dev_dbg(&client->dev,
 		"%s: property = %d, value = %x\n", __func__, psp, val->intval);
 
-	if (ret && bq20z75_device->is_present)
+	if (ret && chip->is_present)
 		return ret;
 
 	/* battery not present, so return NODATA for properties */
@@ -550,7 +538,7 @@ done:
 	return 0;
 }
 
-static irqreturn_t bq20z75_irq(int irq, void *devid)
+static irqreturn_t sbs_irq(int irq, void *devid)
 {
 	struct power_supply *battery = devid;
 
@@ -559,36 +547,35 @@ static irqreturn_t bq20z75_irq(int irq, void *devid)
 	return IRQ_HANDLED;
 }
 
-static void bq20z75_external_power_changed(struct power_supply *psy)
+static void sbs_external_power_changed(struct power_supply *psy)
 {
-	struct bq20z75_info *bq20z75_device;
+	struct sbs_info *chip;
 
-	bq20z75_device = container_of(psy, struct bq20z75_info, power_supply);
+	chip = container_of(psy, struct sbs_info, power_supply);
 
-	if (bq20z75_device->ignore_changes > 0) {
-		bq20z75_device->ignore_changes--;
+	if (chip->ignore_changes > 0) {
+		chip->ignore_changes--;
 		return;
 	}
 
 	/* cancel outstanding work */
-	cancel_delayed_work_sync(&bq20z75_device->work);
+	cancel_delayed_work_sync(&chip->work);
 
-	schedule_delayed_work(&bq20z75_device->work, HZ);
-	bq20z75_device->poll_time = bq20z75_device->pdata->poll_retry_count;
+	schedule_delayed_work(&chip->work, HZ);
+	chip->poll_time = chip->pdata->poll_retry_count;
 }
 
-static void bq20z75_delayed_work(struct work_struct *work)
+static void sbs_delayed_work(struct work_struct *work)
 {
-	struct bq20z75_info *bq20z75_device;
+	struct sbs_info *chip;
 	s32 ret;
 
-	bq20z75_device = container_of(work, struct bq20z75_info, work.work);
+	chip = container_of(work, struct sbs_info, work.work);
 
-	ret = bq20z75_read_word_data(bq20z75_device->client,
-				     bq20z75_data[REG_STATUS].addr);
+	ret = sbs_read_word_data(chip->client, sbs_data[REG_STATUS].addr);
 	/* if the read failed, give up on this work */
 	if (ret < 0) {
-		bq20z75_device->poll_time = 0;
+		chip->poll_time = 0;
 		return;
 	}
 
@@ -601,14 +588,14 @@ static void bq20z75_delayed_work(struct work_struct *work)
 	else
 		ret = POWER_SUPPLY_STATUS_CHARGING;
 
-	if (bq20z75_device->last_state != ret) {
-		bq20z75_device->poll_time = 0;
-		power_supply_changed(&bq20z75_device->power_supply);
+	if (chip->last_state != ret) {
+		chip->poll_time = 0;
+		power_supply_changed(&chip->power_supply);
 		return;
 	}
-	if (bq20z75_device->poll_time > 0) {
-		schedule_delayed_work(&bq20z75_device->work, HZ);
-		bq20z75_device->poll_time--;
+	if (chip->poll_time > 0) {
+		schedule_delayed_work(&chip->work, HZ);
+		chip->poll_time--;
 		return;
 	}
 }
@@ -618,17 +605,18 @@ static void bq20z75_delayed_work(struct work_struct *work)
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
 
-static const struct of_device_id bq20z75_dt_ids[] = {
+static const struct of_device_id sbs_dt_ids[] = {
+	{ .compatible = "sbs,sbs-battery" },
 	{ .compatible = "ti,bq20z75" },
 	{ }
 };
-MODULE_DEVICE_TABLE(i2c, bq20z75_dt_ids);
+MODULE_DEVICE_TABLE(i2c, sbs_dt_ids);
 
-static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
+static struct sbs_platform_data *sbs_of_populate_pdata(
 		struct i2c_client *client)
 {
 	struct device_node *of_node = client->dev.of_node;
-	struct bq20z75_platform_data *pdata = client->dev.platform_data;
+	struct sbs_platform_data *pdata = client->dev.platform_data;
 	enum of_gpio_flags gpio_flags;
 	int rc;
 	u32 prop;
@@ -644,31 +632,31 @@ static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
 	/* first make sure at least one property is set, otherwise
 	 * it won't change behavior from running without pdata.
 	 */
-	if (!of_get_property(of_node, "ti,i2c-retry-count", NULL) &&
-		!of_get_property(of_node, "ti,poll-retry-count", NULL) &&
-		!of_get_property(of_node, "ti,battery-detect-gpios", NULL))
+	if (!of_get_property(of_node, "sbs,i2c-retry-count", NULL) &&
+		!of_get_property(of_node, "sbs,poll-retry-count", NULL) &&
+		!of_get_property(of_node, "sbs,battery-detect-gpios", NULL))
 		goto of_out;
 
-	pdata = devm_kzalloc(&client->dev, sizeof(struct bq20z75_platform_data),
+	pdata = devm_kzalloc(&client->dev, sizeof(struct sbs_platform_data),
 				GFP_KERNEL);
 	if (!pdata)
 		goto of_out;
 
-	rc = of_property_read_u32(of_node, "ti,i2c-retry-count", &prop);
+	rc = of_property_read_u32(of_node, "sbs,i2c-retry-count", &prop);
 	if (!rc)
 		pdata->i2c_retry_count = prop;
 
-	rc = of_property_read_u32(of_node, "ti,poll-retry-count", &prop);
+	rc = of_property_read_u32(of_node, "sbs,poll-retry-count", &prop);
 	if (!rc)
 		pdata->poll_retry_count = prop;
 
-	if (!of_get_property(of_node, "ti,battery-detect-gpios", NULL)) {
+	if (!of_get_property(of_node, "sbs,battery-detect-gpios", NULL)) {
 		pdata->battery_detect = -1;
 		goto of_out;
 	}
 
 	pdata->battery_detect = of_get_named_gpio_flags(of_node,
-			"ti,battery-detect-gpios", 0, &gpio_flags);
+			"sbs,battery-detect-gpios", 0, &gpio_flags);
 
 	if (gpio_flags & OF_GPIO_ACTIVE_LOW)
 		pdata->battery_detect_present = 0;
@@ -679,60 +667,57 @@ of_out:
 	return pdata;
 }
 #else
-#define bq20z75_dt_ids NULL
-static struct bq20z75_platform_data *bq20z75_of_populate_pdata(
+#define sbs_dt_ids NULL
+static struct sbs_platform_data *sbs_of_populate_pdata(
 	struct i2c_client *client)
 {
 	return client->dev.platform_data;
 }
 #endif
 
-static int __devinit bq20z75_probe(struct i2c_client *client,
+static int __devinit sbs_probe(struct i2c_client *client,
 	const struct i2c_device_id *id)
 {
-	struct bq20z75_info *bq20z75_device;
-	struct bq20z75_platform_data *pdata = client->dev.platform_data;
+	struct sbs_info *chip;
+	struct sbs_platform_data *pdata = client->dev.platform_data;
 	int rc;
 	int irq;
 
-	bq20z75_device = kzalloc(sizeof(struct bq20z75_info), GFP_KERNEL);
-	if (!bq20z75_device)
+	chip = kzalloc(sizeof(struct sbs_info), GFP_KERNEL);
+	if (!chip)
 		return -ENOMEM;
 
-	bq20z75_device->client = client;
-	bq20z75_device->enable_detection = false;
-	bq20z75_device->gpio_detect = false;
-	bq20z75_device->power_supply.name = "battery";
-	bq20z75_device->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
-	bq20z75_device->power_supply.properties = bq20z75_properties;
-	bq20z75_device->power_supply.num_properties =
-		ARRAY_SIZE(bq20z75_properties);
-	bq20z75_device->power_supply.get_property = bq20z75_get_property;
+	chip->client = client;
+	chip->enable_detection = false;
+	chip->gpio_detect = false;
+	chip->power_supply.name = "battery";
+	chip->power_supply.type = POWER_SUPPLY_TYPE_BATTERY;
+	chip->power_supply.properties = sbs_properties;
+	chip->power_supply.num_properties = ARRAY_SIZE(sbs_properties);
+	chip->power_supply.get_property = sbs_get_property;
 	/* ignore first notification of external change, it is generated
 	 * from the power_supply_register call back
 	 */
-	bq20z75_device->ignore_changes = 1;
-	bq20z75_device->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
-	bq20z75_device->power_supply.external_power_changed =
-		bq20z75_external_power_changed;
+	chip->ignore_changes = 1;
+	chip->last_state = POWER_SUPPLY_STATUS_UNKNOWN;
+	chip->power_supply.external_power_changed = sbs_external_power_changed;
 
-	pdata = bq20z75_of_populate_pdata(client);
+	pdata = sbs_of_populate_pdata(client);
 
 	if (pdata) {
-		bq20z75_device->gpio_detect =
-			gpio_is_valid(pdata->battery_detect);
-		bq20z75_device->pdata = pdata;
+		chip->gpio_detect = gpio_is_valid(pdata->battery_detect);
+		chip->pdata = pdata;
 	}
 
-	i2c_set_clientdata(client, bq20z75_device);
+	i2c_set_clientdata(client, chip);
 
-	if (!bq20z75_device->gpio_detect)
+	if (!chip->gpio_detect)
 		goto skip_gpio;
 
 	rc = gpio_request(pdata->battery_detect, dev_name(&client->dev));
 	if (rc) {
 		dev_warn(&client->dev, "Failed to request gpio: %d\n", rc);
-		bq20z75_device->gpio_detect = false;
+		chip->gpio_detect = false;
 		goto skip_gpio;
 	}
 
@@ -740,7 +725,7 @@ static int __devinit bq20z75_probe(struct i2c_client *client,
 	if (rc) {
 		dev_warn(&client->dev, "Failed to get gpio as input: %d\n", rc);
 		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
+		chip->gpio_detect = false;
 		goto skip_gpio;
 	}
 
@@ -748,25 +733,25 @@ static int __devinit bq20z75_probe(struct i2c_client *client,
 	if (irq <= 0) {
 		dev_warn(&client->dev, "Failed to get gpio as irq: %d\n", irq);
 		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
+		chip->gpio_detect = false;
 		goto skip_gpio;
 	}
 
-	rc = request_irq(irq, bq20z75_irq,
+	rc = request_irq(irq, sbs_irq,
 		IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-		dev_name(&client->dev), &bq20z75_device->power_supply);
+		dev_name(&client->dev), &chip->power_supply);
 	if (rc) {
 		dev_warn(&client->dev, "Failed to request irq: %d\n", rc);
 		gpio_free(pdata->battery_detect);
-		bq20z75_device->gpio_detect = false;
+		chip->gpio_detect = false;
 		goto skip_gpio;
 	}
 
-	bq20z75_device->irq = irq;
+	chip->irq = irq;
 
 skip_gpio:
 
-	rc = power_supply_register(&client->dev, &bq20z75_device->power_supply);
+	rc = power_supply_register(&client->dev, &chip->power_supply);
 	if (rc) {
 		dev_err(&client->dev,
 			"%s: Failed to register power supply\n", __func__);
@@ -776,96 +761,96 @@ skip_gpio:
 	dev_info(&client->dev,
 		"%s: battery gas gauge device registered\n", client->name);
 
-	INIT_DELAYED_WORK(&bq20z75_device->work, bq20z75_delayed_work);
+	INIT_DELAYED_WORK(&chip->work, sbs_delayed_work);
 
-	bq20z75_device->enable_detection = true;
+	chip->enable_detection = true;
 
 	return 0;
 
 exit_psupply:
-	if (bq20z75_device->irq)
-		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
-	if (bq20z75_device->gpio_detect)
+	if (chip->irq)
+		free_irq(chip->irq, &chip->power_supply);
+	if (chip->gpio_detect)
 		gpio_free(pdata->battery_detect);
 
-	kfree(bq20z75_device);
+	kfree(chip);
 
 	return rc;
 }
 
-static int __devexit bq20z75_remove(struct i2c_client *client)
+static int __devexit sbs_remove(struct i2c_client *client)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 
-	if (bq20z75_device->irq)
-		free_irq(bq20z75_device->irq, &bq20z75_device->power_supply);
-	if (bq20z75_device->gpio_detect)
-		gpio_free(bq20z75_device->pdata->battery_detect);
+	if (chip->irq)
+		free_irq(chip->irq, &chip->power_supply);
+	if (chip->gpio_detect)
+		gpio_free(chip->pdata->battery_detect);
 
-	power_supply_unregister(&bq20z75_device->power_supply);
+	power_supply_unregister(&chip->power_supply);
 
-	cancel_delayed_work_sync(&bq20z75_device->work);
+	cancel_delayed_work_sync(&chip->work);
 
-	kfree(bq20z75_device);
-	bq20z75_device = NULL;
+	kfree(chip);
+	chip = NULL;
 
 	return 0;
 }
 
 #if defined CONFIG_PM
-static int bq20z75_suspend(struct i2c_client *client,
+static int sbs_suspend(struct i2c_client *client,
 	pm_message_t state)
 {
-	struct bq20z75_info *bq20z75_device = i2c_get_clientdata(client);
+	struct sbs_info *chip = i2c_get_clientdata(client);
 	s32 ret;
 
-	if (bq20z75_device->poll_time > 0)
-		cancel_delayed_work_sync(&bq20z75_device->work);
+	if (chip->poll_time > 0)
+		cancel_delayed_work_sync(&chip->work);
 
 	/* write to manufacturer access with sleep command */
-	ret = bq20z75_write_word_data(client,
-		bq20z75_data[REG_MANUFACTURER_DATA].addr,
+	ret = sbs_write_word_data(client, sbs_data[REG_MANUFACTURER_DATA].addr,
 		MANUFACTURER_ACCESS_SLEEP);
-	if (bq20z75_device->is_present && ret < 0)
+	if (chip->is_present && ret < 0)
 		return ret;
 
 	return 0;
 }
 #else
-#define bq20z75_suspend		NULL
+#define sbs_suspend		NULL
 #endif
-/* any smbus transaction will wake up bq20z75 */
-#define bq20z75_resume		NULL
+/* any smbus transaction will wake up sbs */
+#define sbs_resume		NULL
 
-static const struct i2c_device_id bq20z75_id[] = {
+static const struct i2c_device_id sbs_id[] = {
 	{ "bq20z75", 0 },
+	{ "sbs-battery", 1 },
 	{}
 };
-MODULE_DEVICE_TABLE(i2c, bq20z75_id);
-
-static struct i2c_driver bq20z75_battery_driver = {
-	.probe		= bq20z75_probe,
-	.remove		= __devexit_p(bq20z75_remove),
-	.suspend	= bq20z75_suspend,
-	.resume		= bq20z75_resume,
-	.id_table	= bq20z75_id,
+MODULE_DEVICE_TABLE(i2c, sbs_id);
+
+static struct i2c_driver sbs_battery_driver = {
+	.probe		= sbs_probe,
+	.remove		= __devexit_p(sbs_remove),
+	.suspend	= sbs_suspend,
+	.resume		= sbs_resume,
+	.id_table	= sbs_id,
 	.driver = {
-		.name	= "bq20z75-battery",
-		.of_match_table = bq20z75_dt_ids,
+		.name	= "sbs-battery",
+		.of_match_table = sbs_dt_ids,
 	},
 };
 
-static int __init bq20z75_battery_init(void)
+static int __init sbs_battery_init(void)
 {
-	return i2c_add_driver(&bq20z75_battery_driver);
+	return i2c_add_driver(&sbs_battery_driver);
 }
-module_init(bq20z75_battery_init);
+module_init(sbs_battery_init);
 
-static void __exit bq20z75_battery_exit(void)
+static void __exit sbs_battery_exit(void)
 {
-	i2c_del_driver(&bq20z75_battery_driver);
+	i2c_del_driver(&sbs_battery_driver);
 }
-module_exit(bq20z75_battery_exit);
+module_exit(sbs_battery_exit);
 
-MODULE_DESCRIPTION("BQ20z75 battery monitor driver");
+MODULE_DESCRIPTION("SBS battery monitor driver");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/power/sbs-battery.h b/include/linux/power/sbs-battery.h
index 1398eb004e83..2b0a9d9ff57e 100644
--- a/include/linux/power/sbs-battery.h
+++ b/include/linux/power/sbs-battery.h
@@ -1,5 +1,5 @@
 /*
- * Gas Gauge driver for TI's BQ20Z75
+ * Gas Gauge driver for SBS Compliant Gas Gauges
  *
  * Copyright (c) 2010, NVIDIA Corporation.
  *
@@ -18,21 +18,21 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
-#ifndef __LINUX_POWER_BQ20Z75_H_
-#define __LINUX_POWER_BQ20Z75_H_
+#ifndef __LINUX_POWER_SBS_BATTERY_H_
+#define __LINUX_POWER_SBS_BATTERY_H_
 
 #include <linux/power_supply.h>
 #include <linux/types.h>
 
 /**
- * struct bq20z75_platform_data - platform data for bq20z75 devices
+ * struct sbs_platform_data - platform data for sbs devices
  * @battery_detect:		GPIO which is used to detect battery presence
  * @battery_detect_present:	gpio state when battery is present (0 / 1)
  * @i2c_retry_count:		# of times to retry on i2c IO failure
  * @poll_retry_count:		# of times to retry looking for new status after
  *				external change notification
  */
-struct bq20z75_platform_data {
+struct sbs_platform_data {
 	int battery_detect;
 	int battery_detect_present;
 	int i2c_retry_count;
-- 
cgit v1.2.3


From 34aed73df3a9e75e313a7510b201f6755ae3e6bc Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Thu, 29 Dec 2011 12:52:07 +0100
Subject: s3c_adc_battery: Average over more than one adc sample

Some sources for adc battery information provide only inaccurate results
where the read value differs from the real value with positive and negative
offsets. For such sources it can be more accurate to collect two or more
value sample and use the average of all collected values.

This patch adds pdata options volt_samples, current_samples and
backup_volt_samples to specifiy the number of samples to collect,
reads the specified number of samples and calculates the average of those.
For unset sample-number-values a default of 1 is assumed.

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/s3c_adc_battery.c | 25 ++++++++++++++++++++++---
 include/linux/s3c_adc_battery.h |  4 ++++
 2 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/s3c_adc_battery.c b/drivers/power/s3c_adc_battery.c
index e687ee7f18f2..8b804a566756 100644
--- a/drivers/power/s3c_adc_battery.c
+++ b/drivers/power/s3c_adc_battery.c
@@ -47,6 +47,22 @@ static void s3c_adc_bat_ext_power_changed(struct power_supply *psy)
 		msecs_to_jiffies(JITTER_DELAY));
 }
 
+static int gather_samples(struct s3c_adc_client *client, int num, int channel)
+{
+	int value, i;
+
+	/* default to 1 if nothing is set */
+	if (num < 1)
+		num = 1;
+
+	value = 0;
+	for (i = 0; i < num; i++)
+		value += s3c_adc_read(client, channel);
+	value /= num;
+
+	return value;
+}
+
 static enum power_supply_property s3c_adc_backup_bat_props[] = {
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_MIN,
@@ -67,7 +83,8 @@ static int s3c_adc_backup_bat_get_property(struct power_supply *psy,
 	if (bat->volt_value < 0 ||
 		jiffies_to_msecs(jiffies - bat->timestamp) >
 			BAT_POLL_INTERVAL) {
-		bat->volt_value = s3c_adc_read(bat->client,
+		bat->volt_value = gather_samples(bat->client,
+			bat->pdata->backup_volt_samples,
 			bat->pdata->backup_volt_channel);
 		bat->volt_value *= bat->pdata->backup_volt_mult;
 		bat->timestamp = jiffies;
@@ -139,9 +156,11 @@ static int s3c_adc_bat_get_property(struct power_supply *psy,
 	if (bat->volt_value < 0 || bat->cur_value < 0 ||
 		jiffies_to_msecs(jiffies - bat->timestamp) >
 			BAT_POLL_INTERVAL) {
-		bat->volt_value = s3c_adc_read(bat->client,
+		bat->volt_value = gather_samples(bat->client,
+			bat->pdata->volt_samples,
 			bat->pdata->volt_channel) * bat->pdata->volt_mult;
-		bat->cur_value = s3c_adc_read(bat->client,
+		bat->cur_value = gather_samples(bat->client,
+			bat->pdata->current_samples,
 			bat->pdata->current_channel) * bat->pdata->current_mult;
 		bat->timestamp = jiffies;
 	}
diff --git a/include/linux/s3c_adc_battery.h b/include/linux/s3c_adc_battery.h
index fbe58b7e63eb..99dadbffdd4f 100644
--- a/include/linux/s3c_adc_battery.h
+++ b/include/linux/s3c_adc_battery.h
@@ -25,6 +25,10 @@ struct s3c_adc_bat_pdata {
 	const unsigned int current_channel;
 	const unsigned int backup_volt_channel;
 
+	const unsigned int volt_samples;
+	const unsigned int current_samples;
+	const unsigned int backup_volt_samples;
+
 	const unsigned int volt_mult;
 	const unsigned int current_mult;
 	const unsigned int backup_volt_mult;
-- 
cgit v1.2.3


From c11b46c32c8a9bf05fdb76d70d8dc74fcbfd02d1 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 4 Jan 2012 15:34:17 +0100
Subject: dma: shdma: fix runtime PM: clear channel buffers on reset

On platforms, supporting power domains, if the domain, containing a DMAC
instance is powered down, the driver fails to resume correctly. On those
platforms DMAC channels have an additional CHCLR register for clearing
channel buffers. Using this register during runtime resume fixes the
problem.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/shdma.c    | 47 +++++++++++++++++++++++++++++++----------------
 include/linux/sh_dma.h |  2 ++
 2 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 592304fb41a6..54043cd831c8 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -56,6 +56,15 @@ static LIST_HEAD(sh_dmae_devices);
 static unsigned long sh_dmae_slave_used[BITS_TO_LONGS(SH_DMA_SLAVE_NUMBER)];
 
 static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all);
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
+
+static void chclr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	__raw_writel(data, shdev->chan_reg +
+		     shdev->pdata->channel[sh_dc->id].chclr_offset);
+}
 
 static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
 {
@@ -128,6 +137,15 @@ static int sh_dmae_rst(struct sh_dmae_device *shdev)
 
 	dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
 
+	if (shdev->pdata->chclr_present) {
+		int i;
+		for (i = 0; i < shdev->pdata->channel_num; i++) {
+			struct sh_dmae_chan *sh_chan = shdev->chan[i];
+			if (sh_chan)
+				chclr_write(sh_chan, 0);
+		}
+	}
+
 	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
 
 	dmaor = dmaor_read(shdev);
@@ -138,6 +156,10 @@ static int sh_dmae_rst(struct sh_dmae_device *shdev)
 		dev_warn(shdev->common.dev, "Can't initialize DMAOR.\n");
 		return -EIO;
 	}
+	if (shdev->pdata->dmaor_init & ~dmaor)
+		dev_warn(shdev->common.dev,
+			 "DMAOR=0x%x hasn't latched the initial value 0x%x.\n",
+			 dmaor, shdev->pdata->dmaor_init);
 	return 0;
 }
 
@@ -258,8 +280,6 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
 	return 0;
 }
 
-static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
-
 static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c;
@@ -339,6 +359,8 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
 				sh_chan_xfer_ld_queue(sh_chan);
 			sh_chan->pm_state = DMAE_PM_ESTABLISHED;
 		}
+	} else {
+		sh_chan->pm_state = DMAE_PM_PENDING;
 	}
 
 	spin_unlock_irq(&sh_chan->desc_lock);
@@ -1224,6 +1246,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, shdev);
 
+	shdev->common.dev = &pdev->dev;
+
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
@@ -1253,7 +1277,6 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 	shdev->common.device_prep_slave_sg = sh_dmae_prep_slave_sg;
 	shdev->common.device_control = sh_dmae_control;
 
-	shdev->common.dev = &pdev->dev;
 	/* Default transfer size of 32 bytes requires 32-byte alignment */
 	shdev->common.copy_align = LOG2_DEFAULT_XFER_SIZE;
 
@@ -1434,22 +1457,17 @@ static int sh_dmae_runtime_resume(struct device *dev)
 #ifdef CONFIG_PM
 static int sh_dmae_suspend(struct device *dev)
 {
-	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
-	int i;
-
-	for (i = 0; i < shdev->pdata->channel_num; i++) {
-		struct sh_dmae_chan *sh_chan = shdev->chan[i];
-		if (sh_chan->descs_allocated)
-			sh_chan->pm_error = pm_runtime_put_sync(dev);
-	}
-
 	return 0;
 }
 
 static int sh_dmae_resume(struct device *dev)
 {
 	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
-	int i;
+	int i, ret;
+
+	ret = sh_dmae_rst(shdev);
+	if (ret < 0)
+		dev_err(dev, "Failed to reset!\n");
 
 	for (i = 0; i < shdev->pdata->channel_num; i++) {
 		struct sh_dmae_chan *sh_chan = shdev->chan[i];
@@ -1458,9 +1476,6 @@ static int sh_dmae_resume(struct device *dev)
 		if (!sh_chan->descs_allocated)
 			continue;
 
-		if (!sh_chan->pm_error)
-			pm_runtime_get_sync(dev);
-
 		if (param) {
 			const struct sh_dmae_slave_config *cfg = param->config;
 			dmae_set_dmars(sh_chan, cfg->mid_rid);
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index 62ef6938da10..8cd7fe59cf1a 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -48,6 +48,7 @@ struct sh_dmae_channel {
 	unsigned int	offset;
 	unsigned int	dmars;
 	unsigned int	dmars_bit;
+	unsigned int	chclr_offset;
 };
 
 struct sh_dmae_pdata {
@@ -68,6 +69,7 @@ struct sh_dmae_pdata {
 	unsigned int dmaor_is_32bit:1;
 	unsigned int needs_tend_set:1;
 	unsigned int no_dmars:1;
+	unsigned int chclr_present:1;
 };
 
 /* DMA register */
-- 
cgit v1.2.3


From fe3449a4aa4e62404cc1c57c945fd56152b2f877 Mon Sep 17 00:00:00 2001
From: Theodore Kilgore <kilgota@banach.math.auburn.edu>
Date: Tue, 13 Dec 2011 18:09:15 -0300
Subject: [media] gspca: Add jl2005bcd sub driver

Written by Theodore Kilgore

With minor changes by Hans de Goede:
-Code style fixes
-Correct the verbose level on various PDEBUG messages
-Make error messages use pr_err instead of PDEBUG
-Document the jl20 pixel format

Signed-off-by: Theodore Kilgore <kilgota@auburn.edu>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/pixfmt.xml |   5 +
 Documentation/video4linux/gspca.txt        |   1 +
 drivers/media/video/gspca/Kconfig          |  10 +
 drivers/media/video/gspca/Makefile         |   2 +
 drivers/media/video/gspca/jl2005bcd.c      | 554 +++++++++++++++++++++++++++++
 include/linux/videodev2.h                  |   1 +
 6 files changed, 573 insertions(+)
 create mode 100644 drivers/media/video/gspca/jl2005bcd.c

(limited to 'include/linux')

diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index a33a4b22173b..9ddc57cb2ef9 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -890,6 +890,11 @@ kernel sources in the file <filename>Documentation/video4linux/cx2341x/README.hm
 	    <entry>'M310'</entry>
 	    <entry>Compressed BGGR Bayer format used by the gspca driver.</entry>
 	  </row>
+	  <row id="V4L2-PIX-FMT-JL2005BCD">
+	    <entry><constant>V4L2_PIX_FMT_JL2005BCD</constant></entry>
+	    <entry>'JL20'</entry>
+	    <entry>JPEG compressed RGGB Bayer format used by the gspca driver.</entry>
+	  </row>
 	  <row id="V4L2-PIX-FMT-OV511">
 	    <entry><constant>V4L2_PIX_FMT_OV511</constant></entry>
 	    <entry>'O511'</entry>
diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
index 393815b62810..f2060f0dc02c 100644
--- a/Documentation/video4linux/gspca.txt
+++ b/Documentation/video4linux/gspca.txt
@@ -279,6 +279,7 @@ pac7302		093a:2628	Genius iLook 300
 pac7302		093a:2629	Genious iSlim 300
 pac7302		093a:262a	Webcam 300k
 pac7302		093a:262c	Philips SPC 230 NC
+jl2005bcd	0979:0227	Various brands, 19 known cameras supported
 jeilinj		0979:0280	Sakar 57379
 jeilinj		0979:0280	Sportscam DV15
 zc3xx		0ac8:0302	Z-star Vimicro zc0302
diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig
index 103af3fe5aa0..dfe268bfa4f8 100644
--- a/drivers/media/video/gspca/Kconfig
+++ b/drivers/media/video/gspca/Kconfig
@@ -77,6 +77,16 @@ config USB_GSPCA_JEILINJ
 	  To compile this driver as a module, choose M here: the
 	  module will be called gspca_jeilinj.
 
+config USB_GSPCA_JL2005BCD
+	tristate "JL2005B/C/D USB V4L2 driver"
+	depends on VIDEO_V4L2 && USB_GSPCA
+	help
+	  Say Y here if you want support for cameras based the
+	  JL2005B, JL2005C, or JL2005D chip.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called gspca_jl2005bcd.
+
 config USB_GSPCA_KINECT
 	tristate "Kinect sensor device USB Camera Driver"
 	depends on VIDEO_V4L2 && USB_GSPCA
diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile
index f345f494d0f3..79ebe46e1ad7 100644
--- a/drivers/media/video/gspca/Makefile
+++ b/drivers/media/video/gspca/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_USB_GSPCA_CPIA1)    += gspca_cpia1.o
 obj-$(CONFIG_USB_GSPCA_ETOMS)    += gspca_etoms.o
 obj-$(CONFIG_USB_GSPCA_FINEPIX)  += gspca_finepix.o
 obj-$(CONFIG_USB_GSPCA_JEILINJ)  += gspca_jeilinj.o
+obj-$(CONFIG_USB_GSPCA_JL2005BCD) += gspca_jl2005bcd.o
 obj-$(CONFIG_USB_GSPCA_KINECT)   += gspca_kinect.o
 obj-$(CONFIG_USB_GSPCA_KONICA)   += gspca_konica.o
 obj-$(CONFIG_USB_GSPCA_MARS)     += gspca_mars.o
@@ -49,6 +50,7 @@ gspca_cpia1-objs    := cpia1.o
 gspca_etoms-objs    := etoms.o
 gspca_finepix-objs  := finepix.o
 gspca_jeilinj-objs  := jeilinj.o
+gspca_jl2005bcd-objs  := jl2005bcd.o
 gspca_kinect-objs   := kinect.o
 gspca_konica-objs   := konica.o
 gspca_mars-objs     := mars.o
diff --git a/drivers/media/video/gspca/jl2005bcd.c b/drivers/media/video/gspca/jl2005bcd.c
new file mode 100644
index 000000000000..53f58ef367cf
--- /dev/null
+++ b/drivers/media/video/gspca/jl2005bcd.c
@@ -0,0 +1,554 @@
+/*
+ * Jeilin JL2005B/C/D library
+ *
+ * Copyright (C) 2011 Theodore Kilgore <kilgota@auburn.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define MODULE_NAME "jl2005bcd"
+
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include "gspca.h"
+
+
+MODULE_AUTHOR("Theodore Kilgore <kilgota@auburn.edu>");
+MODULE_DESCRIPTION("JL2005B/C/D USB Camera Driver");
+MODULE_LICENSE("GPL");
+
+/* Default timeouts, in ms */
+#define JL2005C_CMD_TIMEOUT 500
+#define JL2005C_DATA_TIMEOUT 1000
+
+/* Maximum transfer size to use. */
+#define JL2005C_MAX_TRANSFER 0x200
+#define FRAME_HEADER_LEN 16
+
+
+/* specific webcam descriptor */
+struct sd {
+	struct gspca_dev gspca_dev;  /* !! must be the first item */
+	unsigned char firmware_id[6];
+	const struct v4l2_pix_format *cap_mode;
+	/* Driver stuff */
+	struct work_struct work_struct;
+	struct workqueue_struct *work_thread;
+	u8 frame_brightness;
+	int block_size;	/* block size of camera */
+	int vga;	/* 1 if vga cam, 0 if cif cam */
+};
+
+
+/* Camera has two resolution settings. What they are depends on model. */
+static const struct v4l2_pix_format cif_mode[] = {
+	{176, 144, V4L2_PIX_FMT_JL2005BCD, V4L2_FIELD_NONE,
+		.bytesperline = 176,
+		.sizeimage = 176 * 144,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+	{352, 288, V4L2_PIX_FMT_JL2005BCD, V4L2_FIELD_NONE,
+		.bytesperline = 352,
+		.sizeimage = 352 * 288,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+};
+
+static const struct v4l2_pix_format vga_mode[] = {
+	{320, 240, V4L2_PIX_FMT_JL2005BCD, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 320 * 240,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+	{640, 480, V4L2_PIX_FMT_JL2005BCD, V4L2_FIELD_NONE,
+		.bytesperline = 640,
+		.sizeimage = 640 * 480,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+};
+
+/*
+ * cam uses endpoint 0x03 to send commands, 0x84 for read commands,
+ * and 0x82 for bulk data transfer.
+ */
+
+/* All commands are two bytes only */
+static int jl2005c_write2(struct gspca_dev *gspca_dev, unsigned char *command)
+{
+	int retval;
+
+	memcpy(gspca_dev->usb_buf, command, 2);
+	retval = usb_bulk_msg(gspca_dev->dev,
+			usb_sndbulkpipe(gspca_dev->dev, 3),
+			gspca_dev->usb_buf, 2, NULL, 500);
+	if (retval < 0)
+		pr_err("command write [%02x] error %d\n",
+		       gspca_dev->usb_buf[0], retval);
+	return retval;
+}
+
+/* Response to a command is one byte in usb_buf[0], only if requested. */
+static int jl2005c_read1(struct gspca_dev *gspca_dev)
+{
+	int retval;
+
+	retval = usb_bulk_msg(gspca_dev->dev,
+				usb_rcvbulkpipe(gspca_dev->dev, 0x84),
+				gspca_dev->usb_buf, 1, NULL, 500);
+	if (retval < 0)
+		pr_err("read command [0x%02x] error %d\n",
+		       gspca_dev->usb_buf[0], retval);
+	return retval;
+}
+
+/* Response appears in gspca_dev->usb_buf[0] */
+static int jl2005c_read_reg(struct gspca_dev *gspca_dev, unsigned char reg)
+{
+	int retval;
+
+	static u8 instruction[2] = {0x95, 0x00};
+	/* put register to read in byte 1 */
+	instruction[1] = reg;
+	/* Send the read request */
+	retval = jl2005c_write2(gspca_dev, instruction);
+	if (retval < 0)
+		return retval;
+	retval = jl2005c_read1(gspca_dev);
+
+	return retval;
+}
+
+static int jl2005c_start_new_frame(struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval;
+	int frame_brightness = 0;
+
+	static u8 instruction[2] = {0x7f, 0x01};
+
+	retval = jl2005c_write2(gspca_dev, instruction);
+	if (retval < 0)
+		return retval;
+
+	i = 0;
+	while (i < 20 && !frame_brightness) {
+		/* If we tried 20 times, give up. */
+		retval = jl2005c_read_reg(gspca_dev, 0x7e);
+		if (retval < 0)
+			return retval;
+		frame_brightness = gspca_dev->usb_buf[0];
+		retval = jl2005c_read_reg(gspca_dev, 0x7d);
+		if (retval < 0)
+			return retval;
+		i++;
+	}
+	PDEBUG(D_FRAM, "frame_brightness is 0x%02x", gspca_dev->usb_buf[0]);
+	return retval;
+}
+
+static int jl2005c_write_reg(struct gspca_dev *gspca_dev, unsigned char reg,
+						    unsigned char value)
+{
+	int retval;
+	u8 instruction[2];
+
+	instruction[0] = reg;
+	instruction[1] = value;
+
+	retval = jl2005c_write2(gspca_dev, instruction);
+	if (retval < 0)
+			return retval;
+
+	return retval;
+}
+
+static int jl2005c_get_firmware_id(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *)gspca_dev;
+	int i = 0;
+	int retval = -1;
+	unsigned char regs_to_read[] = {0x57, 0x02, 0x03, 0x5d, 0x5e, 0x5f};
+
+	PDEBUG(D_PROBE, "Running jl2005c_get_firmware_id");
+	/* Read the first ID byte once for warmup */
+	retval = jl2005c_read_reg(gspca_dev, regs_to_read[0]);
+	PDEBUG(D_PROBE, "response is %02x", gspca_dev->usb_buf[0]);
+	if (retval < 0)
+		return retval;
+	/* Now actually get the ID string */
+	for (i = 0; i < 6; i++) {
+		retval = jl2005c_read_reg(gspca_dev, regs_to_read[i]);
+		if (retval < 0)
+			return retval;
+		sd->firmware_id[i] = gspca_dev->usb_buf[0];
+	}
+	PDEBUG(D_PROBE, "firmware ID is %02x%02x%02x%02x%02x%02x",
+						sd->firmware_id[0],
+						sd->firmware_id[1],
+						sd->firmware_id[2],
+						sd->firmware_id[3],
+						sd->firmware_id[4],
+						sd->firmware_id[5]);
+	return 0;
+}
+
+static int jl2005c_stream_start_vga_lg
+		    (struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval = -1;
+	static u8 instruction[][2] = {
+		{0x05, 0x00},
+		{0x7c, 0x00},
+		{0x7d, 0x18},
+		{0x02, 0x00},
+		{0x01, 0x00},
+		{0x04, 0x52},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(instruction); i++) {
+		msleep(60);
+		retval = jl2005c_write2(gspca_dev, instruction[i]);
+		if (retval < 0)
+			return retval;
+	}
+	msleep(60);
+	return retval;
+}
+
+static int jl2005c_stream_start_vga_small(struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval = -1;
+	static u8 instruction[][2] = {
+		{0x06, 0x00},
+		{0x7c, 0x00},
+		{0x7d, 0x1a},
+		{0x02, 0x00},
+		{0x01, 0x00},
+		{0x04, 0x52},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(instruction); i++) {
+		msleep(60);
+		retval = jl2005c_write2(gspca_dev, instruction[i]);
+		if (retval < 0)
+			return retval;
+	}
+	msleep(60);
+	return retval;
+}
+
+static int jl2005c_stream_start_cif_lg(struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval = -1;
+	static u8 instruction[][2] = {
+		{0x05, 0x00},
+		{0x7c, 0x00},
+		{0x7d, 0x30},
+		{0x02, 0x00},
+		{0x01, 0x00},
+		{0x04, 0x42},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(instruction); i++) {
+		msleep(60);
+		retval = jl2005c_write2(gspca_dev, instruction[i]);
+		if (retval < 0)
+			return retval;
+	}
+	msleep(60);
+	return retval;
+}
+
+static int jl2005c_stream_start_cif_small(struct gspca_dev *gspca_dev)
+{
+	int i;
+	int retval = -1;
+	static u8 instruction[][2] = {
+		{0x06, 0x00},
+		{0x7c, 0x00},
+		{0x7d, 0x32},
+		{0x02, 0x00},
+		{0x01, 0x00},
+		{0x04, 0x42},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(instruction); i++) {
+		msleep(60);
+		retval = jl2005c_write2(gspca_dev, instruction[i]);
+		if (retval < 0)
+			return retval;
+	}
+	msleep(60);
+	return retval;
+}
+
+
+static int jl2005c_stop(struct gspca_dev *gspca_dev)
+{
+	int retval;
+
+	retval = jl2005c_write_reg(gspca_dev, 0x07, 0x00);
+	return retval;
+}
+
+/* This function is called as a workqueue function and runs whenever the camera
+ * is streaming data. Because it is a workqueue function it is allowed to sleep
+ * so we can use synchronous USB calls. To avoid possible collisions with other
+ * threads attempting to use the camera's USB interface the gspca usb_lock is
+ * used when performing the one USB control operation inside the workqueue,
+ * which tells the camera to close the stream. In practice the only thing
+ * which needs to be protected against is the usb_set_interface call that
+ * gspca makes during stream_off. Otherwise the camera doesn't provide any
+ * controls that the user could try to change.
+ */
+static void jl2005c_dostream(struct work_struct *work)
+{
+	struct sd *dev = container_of(work, struct sd, work_struct);
+	struct gspca_dev *gspca_dev = &dev->gspca_dev;
+	int bytes_left = 0; /* bytes remaining in current frame. */
+	int data_len;   /* size to use for the next read. */
+	int header_read = 0;
+	unsigned char header_sig[2] = {0x4a, 0x4c};
+	int act_len;
+	int packet_type;
+	int ret;
+	u8 *buffer;
+
+	buffer = kmalloc(JL2005C_MAX_TRANSFER, GFP_KERNEL | GFP_DMA);
+	if (!buffer) {
+		pr_err("Couldn't allocate USB buffer\n");
+		goto quit_stream;
+	}
+
+	while (gspca_dev->present && gspca_dev->streaming) {
+		/* Check if this is a new frame. If so, start the frame first */
+		if (!header_read) {
+			mutex_lock(&gspca_dev->usb_lock);
+			ret = jl2005c_start_new_frame(gspca_dev);
+			mutex_unlock(&gspca_dev->usb_lock);
+			if (ret < 0)
+				goto quit_stream;
+			ret = usb_bulk_msg(gspca_dev->dev,
+				usb_rcvbulkpipe(gspca_dev->dev, 0x82),
+				buffer, JL2005C_MAX_TRANSFER, &act_len,
+				JL2005C_DATA_TIMEOUT);
+			PDEBUG(D_PACK,
+				"Got %d bytes out of %d for header",
+					act_len, JL2005C_MAX_TRANSFER);
+			if (ret < 0 || act_len < JL2005C_MAX_TRANSFER)
+				goto quit_stream;
+			/* Check whether we actually got the first blodk */
+			if (memcmp(header_sig, buffer, 2) != 0) {
+				pr_err("First block is not the first block\n");
+				goto quit_stream;
+			}
+			/* total size to fetch is byte 7, times blocksize
+			 * of which we already got act_len */
+			bytes_left = buffer[0x07] * dev->block_size - act_len;
+			PDEBUG(D_PACK, "bytes_left = 0x%x", bytes_left);
+			/* We keep the header. It has other information, too.*/
+			packet_type = FIRST_PACKET;
+			gspca_frame_add(gspca_dev, packet_type,
+					buffer, act_len);
+			header_read = 1;
+		}
+		while (bytes_left > 0 && gspca_dev->present) {
+			data_len = bytes_left > JL2005C_MAX_TRANSFER ?
+				JL2005C_MAX_TRANSFER : bytes_left;
+			ret = usb_bulk_msg(gspca_dev->dev,
+				usb_rcvbulkpipe(gspca_dev->dev, 0x82),
+				buffer, data_len, &act_len,
+				JL2005C_DATA_TIMEOUT);
+			if (ret < 0 || act_len < data_len)
+				goto quit_stream;
+			PDEBUG(D_PACK,
+				"Got %d bytes out of %d for frame",
+						data_len, bytes_left);
+			bytes_left -= data_len;
+			if (bytes_left == 0) {
+				packet_type = LAST_PACKET;
+				header_read = 0;
+			} else
+				packet_type = INTER_PACKET;
+			gspca_frame_add(gspca_dev, packet_type,
+					buffer, data_len);
+		}
+	}
+quit_stream:
+	if (gspca_dev->present) {
+		mutex_lock(&gspca_dev->usb_lock);
+		jl2005c_stop(gspca_dev);
+		mutex_unlock(&gspca_dev->usb_lock);
+	}
+	kfree(buffer);
+}
+
+
+
+
+/* This function is called at probe time */
+static int sd_config(struct gspca_dev *gspca_dev,
+			const struct usb_device_id *id)
+{
+	struct cam *cam;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	cam = &gspca_dev->cam;
+	/* We don't use the buffer gspca allocates so make it small. */
+	cam->bulk_size = 64;
+	cam->bulk = 1;
+	/* For the rest, the camera needs to be detected */
+	jl2005c_get_firmware_id(gspca_dev);
+	/* Here are some known firmware IDs
+	 * First some JL2005B cameras
+	 * {0x41, 0x07, 0x04, 0x2c, 0xe8, 0xf2}	Sakar KidzCam
+	 * {0x45, 0x02, 0x08, 0xb9, 0x00, 0xd2}	No-name JL2005B
+	 * JL2005C cameras
+	 * {0x01, 0x0c, 0x16, 0x10, 0xf8, 0xc8}	Argus DC-1512
+	 * {0x12, 0x04, 0x03, 0xc0, 0x00, 0xd8}	ICarly
+	 * {0x86, 0x08, 0x05, 0x02, 0x00, 0xd4}	Jazz
+	 *
+	 * Based upon this scanty evidence, we can detect a CIF camera by
+	 * testing byte 0 for 0x4x.
+	 */
+	if ((sd->firmware_id[0] & 0xf0) == 0x40) {
+		cam->cam_mode	= cif_mode;
+		cam->nmodes	= ARRAY_SIZE(cif_mode);
+		sd->block_size	= 0x80;
+	} else {
+		cam->cam_mode	= vga_mode;
+		cam->nmodes	= ARRAY_SIZE(vga_mode);
+		sd->block_size	= 0x200;
+	}
+
+	INIT_WORK(&sd->work_struct, jl2005c_dostream);
+
+	return 0;
+}
+
+/* this function is called at probe and resume time */
+static int sd_init(struct gspca_dev *gspca_dev)
+{
+	return 0;
+}
+
+static int sd_start(struct gspca_dev *gspca_dev)
+{
+
+	struct sd *sd = (struct sd *) gspca_dev;
+	sd->cap_mode = gspca_dev->cam.cam_mode;
+
+	switch (gspca_dev->width) {
+	case 640:
+		PDEBUG(D_STREAM, "Start streaming at vga resolution");
+		jl2005c_stream_start_vga_lg(gspca_dev);
+		break;
+	case 320:
+		PDEBUG(D_STREAM, "Start streaming at qvga resolution");
+		jl2005c_stream_start_vga_small(gspca_dev);
+		break;
+	case 352:
+		PDEBUG(D_STREAM, "Start streaming at cif resolution");
+		jl2005c_stream_start_cif_lg(gspca_dev);
+		break;
+	case 176:
+		PDEBUG(D_STREAM, "Start streaming at qcif resolution");
+		jl2005c_stream_start_cif_small(gspca_dev);
+		break;
+	default:
+		pr_err("Unknown resolution specified\n");
+		return -1;
+	}
+
+	/* Start the workqueue function to do the streaming */
+	sd->work_thread = create_singlethread_workqueue(MODULE_NAME);
+	queue_work(sd->work_thread, &sd->work_struct);
+
+	return 0;
+}
+
+/* called on streamoff with alt==0 and on disconnect */
+/* the usb_lock is held at entry - restore on exit */
+static void sd_stop0(struct gspca_dev *gspca_dev)
+{
+	struct sd *dev = (struct sd *) gspca_dev;
+
+	/* wait for the work queue to terminate */
+	mutex_unlock(&gspca_dev->usb_lock);
+	/* This waits for sq905c_dostream to finish */
+	destroy_workqueue(dev->work_thread);
+	dev->work_thread = NULL;
+	mutex_lock(&gspca_dev->usb_lock);
+}
+
+
+
+/* sub-driver description */
+static const struct sd_desc sd_desc = {
+	.name = MODULE_NAME,
+	/* .ctrls = none have been detected */
+	/* .nctrls = ARRAY_SIZE(sd_ctrls),  */
+	.config = sd_config,
+	.init = sd_init,
+	.start = sd_start,
+	.stop0 = sd_stop0,
+};
+
+/* -- module initialisation -- */
+static const __devinitdata struct usb_device_id device_table[] = {
+	{USB_DEVICE(0x0979, 0x0227)},
+	{}
+};
+MODULE_DEVICE_TABLE(usb, device_table);
+
+/* -- device connect -- */
+static int sd_probe(struct usb_interface *intf,
+				const struct usb_device_id *id)
+{
+	return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd),
+				THIS_MODULE);
+}
+
+static struct usb_driver sd_driver = {
+	.name = MODULE_NAME,
+	.id_table = device_table,
+	.probe = sd_probe,
+	.disconnect = gspca_disconnect,
+#ifdef CONFIG_PM
+	.suspend = gspca_suspend,
+	.resume = gspca_resume,
+#endif
+};
+
+/* -- module insert / remove -- */
+static int __init sd_mod_init(void)
+{
+	int ret;
+
+	ret = usb_register(&sd_driver);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+static void __exit sd_mod_exit(void)
+{
+	usb_deregister(&sd_driver);
+}
+
+module_init(sd_mod_init);
+module_exit(sd_mod_exit);
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 2965906a02c9..6bfaa767a817 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -401,6 +401,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SPCA561  v4l2_fourcc('S', '5', '6', '1') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_PAC207   v4l2_fourcc('P', '2', '0', '7') /* compressed BGGR bayer */
 #define V4L2_PIX_FMT_MR97310A v4l2_fourcc('M', '3', '1', '0') /* compressed BGGR bayer */
+#define V4L2_PIX_FMT_JL2005BCD v4l2_fourcc('J', 'L', '2', '0') /* compressed RGGB bayer */
 #define V4L2_PIX_FMT_SN9C2028 v4l2_fourcc('S', 'O', 'N', 'X') /* compressed GBRG bayer */
 #define V4L2_PIX_FMT_SQ905C   v4l2_fourcc('9', '0', '5', 'C') /* compressed RGGB bayer */
 #define V4L2_PIX_FMT_PJPG     v4l2_fourcc('P', 'J', 'P', 'G') /* Pixart 73xx JPEG */
-- 
cgit v1.2.3


From da8d1c8ba4dcb16d60be54b233deca9a7cac98dc Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 6 Oct 2011 14:08:18 -0400
Subject: PCI/sysfs: add per pci device msi[x] irq listing (v5)

This patch adds a per-pci-device subdirectory in sysfs called:
/sys/bus/pci/devices/<device>/msi_irqs

This sub-directory exports the set of msi vectors allocated by a given
pci device, by creating a numbered sub-directory for each vector beneath
msi_irqs.  For each vector various attributes can be exported.
Currently the only attribute is called mode, which tracks the
operational mode of that vector (msi vs. msix)

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-bus-pci |  18 ++++++
 drivers/pci/msi.c                       | 111 ++++++++++++++++++++++++++++++++
 include/linux/msi.h                     |   3 +
 include/linux/pci.h                     |   1 +
 4 files changed, 133 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf26ce10..34f51100f029 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
 		re-discover previously removed devices.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../msi_irqs/
+Date:		September, 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		The /sys/devices/.../msi_irqs directory contains a variable set
+		of sub-directories, with each sub-directory being named after a
+		corresponding msi irq vector allocated to that device.  Each
+		numbered sub-directory N contains attributes of that irq.
+		Note that this directory is not created for device drivers which
+		do not support msi irqs
+
+What:		/sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date:		September 2011
+Contact:	Neil Horman <nhorman@tuxdriver.com>
+Description:
+		This attribute indicates the mode that the irq vector named by
+		the parent directory is in (msi vs. msix)
+
 What:		/sys/bus/pci/devices/.../remove
 Date:		January 2009
 Contact:	Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 0e6d04d7ba4f..e6b6b9c67023 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -323,6 +323,8 @@ static void free_msi_irqs(struct pci_dev *dev)
 			if (list_is_last(&entry->list, &dev->msi_list))
 				iounmap(entry->mask_base);
 		}
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
 		list_del(&entry->list);
 		kfree(entry);
 	}
@@ -403,6 +405,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+	struct attribute        attr;
+	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+			char *buf);
+	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+			 const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+			     char *buf)
+{
+	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+				 struct attribute *attr, char *buf)
+{
+	struct msi_attribute *attribute = to_msi_attr(attr);
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+	.show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+	&mode_attribute.attr,
+	NULL
+};
+
+void msi_kobj_release(struct kobject *kobj)
+{
+	struct msi_desc *entry = to_msi_desc(kobj);
+
+	pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+	.release = msi_kobj_release,
+	.sysfs_ops = &msi_irq_sysfs_ops,
+	.default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+	struct msi_desc *entry;
+	struct kobject *kobj;
+	int ret;
+	int count = 0;
+
+	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+	if (!pdev->msi_kset)
+		return -ENOMEM;
+
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		kobj = &entry->kobj;
+		kobj->kset = pdev->msi_kset;
+		pci_dev_get(pdev);
+		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+				     "%u", entry->irq);
+		if (ret)
+			goto out_unroll;
+
+		count++;
+	}
+
+	return 0;
+
+out_unroll:
+	list_for_each_entry(entry, &pdev->msi_list, list) {
+		if (!count)
+			break;
+		kobject_del(&entry->kobj);
+		kobject_put(&entry->kobj);
+		count--;
+	}
+	return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -454,6 +548,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
 	msi_set_enable(dev, pos, 1);
@@ -574,6 +675,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
 	msix_program_entries(dev, entries);
 
+	ret = populate_msi_sysfs(dev);
+	if (ret) {
+		ret = 0;
+		goto error;
+	}
+
 	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
 	dev->msix_enabled = 1;
@@ -732,6 +839,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
 	pci_msi_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -830,6 +939,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
 	pci_msix_shutdown(dev);
 	free_msi_irqs(dev);
+	kset_unregister(dev->msi_kset);
+	dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 05acced439a3..ce93a341337d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
+
+	struct kobject kobj;
 };
 
 /*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7cda65b5f798..84225c756bd1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -336,6 +336,7 @@ struct pci_dev {
 	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
 	struct list_head msi_list;
+	struct kset *msi_kset;
 #endif
 	struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_ATS
-- 
cgit v1.2.3


From cfa4d8cc56853ec945956d182ecb4c99102b110a Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 2 Nov 2011 14:07:15 -0600
Subject: PCI: Fix PRI and PASID consistency

These are extended capabilities, rename and move to proper
group for consistency.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/ats.c        | 20 ++++++++++----------
 include/linux/pci_regs.h |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index b0dd08e6a9da..e11ebafaf774 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -175,7 +175,7 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
 	u32 max_requests;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -206,7 +206,7 @@ void pci_disable_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return;
 
@@ -227,7 +227,7 @@ bool pci_pri_enabled(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return false;
 
@@ -249,7 +249,7 @@ int pci_reset_pri(struct pci_dev *pdev)
 	u16 control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -282,7 +282,7 @@ bool pci_pri_stopped(struct pci_dev *pdev)
 	u16 control, status;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return true;
 
@@ -311,7 +311,7 @@ int pci_pri_status(struct pci_dev *pdev)
 	u16 status, control;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
 	if (!pos)
 		return -EINVAL;
 
@@ -342,7 +342,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	u16 control, supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
@@ -376,7 +376,7 @@ void pci_disable_pasid(struct pci_dev *pdev)
 	u16 control = 0;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return;
 
@@ -400,7 +400,7 @@ int pci_pasid_features(struct pci_dev *pdev)
 	u16 supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
@@ -426,7 +426,7 @@ int pci_max_pasids(struct pci_dev *pdev)
 	u16 supported;
 	int pos;
 
-	pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
 
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index b5d9657f3100..090d3a9f5b26 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -537,7 +537,9 @@
 #define PCI_EXT_CAP_ID_ARI	14
 #define PCI_EXT_CAP_ID_ATS	15
 #define PCI_EXT_CAP_ID_SRIOV	16
+#define PCI_EXT_CAP_ID_PRI	19
 #define PCI_EXT_CAP_ID_LTR	24
+#define PCI_EXT_CAP_ID_PASID	27
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS	4	/* Uncorrectable Error Status */
@@ -664,7 +666,6 @@
 #define  PCI_ATS_MIN_STU	12	/* shift of minimum STU block */
 
 /* Page Request Interface */
-#define PCI_PRI_CAP		0x13    /* PRI capability ID */
 #define PCI_PRI_CONTROL_OFF	0x04	/* Offset of control register */
 #define PCI_PRI_STATUS_OFF	0x06	/* Offset of status register */
 #define PCI_PRI_ENABLE		0x0001	/* Enable mask */
@@ -676,7 +677,6 @@
 #define PCI_PRI_ALLOC_REQ_OFF	0x0c	/* Cap offset for max reqs allowed */
 
 /* PASID capability */
-#define PCI_PASID_CAP		0x1b    /* PASID capability ID */
 #define PCI_PASID_CAP_OFF	0x04    /* PASID feature register */
 #define PCI_PASID_CONTROL_OFF   0x06    /* PASID control register */
 #define PCI_PASID_ENABLE	0x01	/* Enable/Supported bit */
-- 
cgit v1.2.3


From 10f6dc7eede9a8895626e9c1b4f2c3b75fbf2850 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 10 Nov 2011 16:38:33 -0500
Subject: PCI: Rework ASPM disable code

Right now we forcibly clear ASPM state on all devices if the BIOS indicates
that the feature isn't supported. Based on the Microsoft presentation
"PCI Express In Depth for Windows Vista and Beyond", I'm starting to think
that this may be an error. The implication is that unless the platform
grants full control via _OSC, Windows will not touch any PCIe features -
including ASPM. In that case clearing ASPM state would be an error unless
the platform has granted us that control.

This patch reworks the ASPM disabling code such that the actual clearing
of state is triggered by a successful handoff of PCIe control to the OS.
The general ASPM code undergoes some changes in order to ensure that the
ability to clear the bits isn't overridden by ASPM having already been
disabled. Further, this theoretically now allows for situations where
only a subset of PCIe roots hand over control, leaving the others in the
BIOS state.

It's difficult to know for sure that this is the right thing to do -
there's zero public documentation on the interaction between all of these
components. But enough vendors enable ASPM on platforms and then set this
bit that it seems likely that they're expecting the OS to leave them alone.

Measured to save around 5W on an idle Thinkpad X220.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c  |  7 ++++++
 drivers/pci/pci-acpi.c   |  1 -
 drivers/pci/pcie/aspm.c  | 58 ++++++++++++++++++++++++++++++------------------
 include/linux/pci-aspm.h |  4 ++--
 4 files changed, 46 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 2672c798272f..7aff6312ce7c 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -596,6 +596,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
 		if (ACPI_SUCCESS(status)) {
 			dev_info(root->bus->bridge,
 				"ACPI _OSC control (0x%02x) granted\n", flags);
+			if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
+				/*
+				 * We have ASPM control, but the FADT indicates
+				 * that it's unsupported. Clear it.
+				 */
+				pcie_clear_aspm(root->bus);
+			}
 		} else {
 			dev_info(root->bus->bridge,
 				"ACPI _OSC request failed (%s), "
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 4ecb6408b0d6..c8e75851a314 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -395,7 +395,6 @@ static int __init acpi_pci_init(void)
 
 	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
 		printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
-		pcie_clear_aspm();
 		pcie_no_aspm();
 	}
 
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index cbfbab18be91..1cfbf228fbb1 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -68,7 +68,7 @@ struct pcie_link_state {
 	struct aspm_latency acceptable[8];
 };
 
-static int aspm_disabled, aspm_force, aspm_clear_state;
+static int aspm_disabled, aspm_force;
 static bool aspm_support_enabled = true;
 static DEFINE_MUTEX(aspm_lock);
 static LIST_HEAD(link_list);
@@ -500,9 +500,6 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
 	int pos;
 	u32 reg32;
 
-	if (aspm_clear_state)
-		return -EINVAL;
-
 	/*
 	 * Some functions in a slot might not all be PCIe functions,
 	 * very strange. Disable ASPM for the whole slot
@@ -574,9 +571,6 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 	    pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
 		return;
 
-	if (aspm_disabled && !aspm_clear_state)
-		return;
-
 	/* VIA has a strange chipset, root port is under a bridge */
 	if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT &&
 	    pdev->bus->self)
@@ -608,7 +602,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 	 * the BIOS's expectation, we'll do so once pci_enable_device() is
 	 * called.
 	 */
-	if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) {
+	if (aspm_policy != POLICY_POWERSAVE) {
 		pcie_config_aspm_path(link);
 		pcie_set_clkpm(link, policy_to_clkpm_state(link));
 	}
@@ -649,8 +643,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link, *root, *parent_link;
 
-	if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) ||
-	    !parent || !parent->link_state)
+	if (!pci_is_pcie(pdev) || !parent || !parent->link_state)
 		return;
 	if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
 	    (parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
@@ -734,13 +727,18 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
  */
-static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
+static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
+				     bool force)
 {
 	struct pci_dev *parent = pdev->bus->self;
 	struct pcie_link_state *link;
 
-	if (aspm_disabled || !pci_is_pcie(pdev))
+	if (aspm_disabled && !force)
+		return;
+
+	if (!pci_is_pcie(pdev))
 		return;
+
 	if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
 	    pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
 		parent = pdev;
@@ -768,16 +766,31 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 
 void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, false);
+	__pci_disable_link_state(pdev, state, false, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state_locked);
 
 void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
-	__pci_disable_link_state(pdev, state, true);
+	__pci_disable_link_state(pdev, state, true, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
+void pcie_clear_aspm(struct pci_bus *bus)
+{
+	struct pci_dev *child;
+
+	/*
+	 * Clear any ASPM setup that the firmware has carried out on this bus
+	 */
+	list_for_each_entry(child, &bus->devices, bus_list) {
+		__pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
+					 PCIE_LINK_STATE_L1 |
+					 PCIE_LINK_STATE_CLKPM,
+					 false, true);
+	}
+}
+
 static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
 {
 	int i;
@@ -935,6 +948,7 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
 static int __init pcie_aspm_disable(char *str)
 {
 	if (!strcmp(str, "off")) {
+		aspm_policy = POLICY_DEFAULT;
 		aspm_disabled = 1;
 		aspm_support_enabled = false;
 		printk(KERN_INFO "PCIe ASPM is disabled\n");
@@ -947,16 +961,18 @@ static int __init pcie_aspm_disable(char *str)
 
 __setup("pcie_aspm=", pcie_aspm_disable);
 
-void pcie_clear_aspm(void)
-{
-	if (!aspm_force)
-		aspm_clear_state = 1;
-}
-
 void pcie_no_aspm(void)
 {
-	if (!aspm_force)
+	/*
+	 * Disabling ASPM is intended to prevent the kernel from modifying
+	 * existing hardware state, not to clear existing state. To that end:
+	 * (a) set policy to POLICY_DEFAULT in order to avoid changing state
+	 * (b) prevent userspace from changing policy
+	 */
+	if (!aspm_force) {
+		aspm_policy = POLICY_DEFAULT;
 		aspm_disabled = 1;
+	}
 }
 
 /**
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
index 7cea7b6c1413..c8320144fe79 100644
--- a/include/linux/pci-aspm.h
+++ b/include/linux/pci-aspm.h
@@ -29,7 +29,7 @@ extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
 extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
-extern void pcie_clear_aspm(void);
+extern void pcie_clear_aspm(struct pci_bus *bus);
 extern void pcie_no_aspm(void);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
@@ -47,7 +47,7 @@ static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }
-static inline void pcie_clear_aspm(void)
+static inline void pcie_clear_aspm(struct pci_bus *bus)
 {
 }
 static inline void pcie_no_aspm(void)
-- 
cgit v1.2.3


From 1830ea91c20b06608f7cdb2455ce05ba834b3214 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 16 Nov 2011 09:24:16 -0700
Subject: PCI: Fix PCI_EXP_TYPE_RC_EC value

Spec shows this as 1010b = 0xa

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index 090d3a9f5b26..9e37250a2a22 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -392,7 +392,7 @@
 #define  PCI_EXP_TYPE_DOWNSTREAM 0x6	/* Downstream Port */
 #define  PCI_EXP_TYPE_PCI_BRIDGE 0x7	/* PCI/PCI-X Bridge */
 #define  PCI_EXP_TYPE_RC_END	0x9	/* Root Complex Integrated Endpoint */
-#define  PCI_EXP_TYPE_RC_EC	0x10	/* Root Complex Event Collector */
+#define  PCI_EXP_TYPE_RC_EC	0xa	/* Root Complex Event Collector */
 #define PCI_EXP_FLAGS_SLOT	0x0100	/* Slot implemented */
 #define PCI_EXP_FLAGS_IRQ	0x3e00	/* Interrupt message number */
 #define PCI_EXP_DEVCAP		4	/* Device capabilities */
-- 
cgit v1.2.3


From fb51ccbf217c1c994607b6519c7d85250928553d Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 4 Nov 2011 09:45:59 +0100
Subject: PCI: Rework config space blocking services

pci_block_user_cfg_access was designed for the use case that a single
context, the IPR driver, temporarily delays user space accesses to the
config space via sysfs. This assumption became invalid by the time
pci_dev_reset was added as locking instance. Today, if you run two loops
in parallel that reset the same device via sysfs, you end up with a
kernel BUG as pci_block_user_cfg_access detect the broken assumption.

This reworks the pci_block_user_cfg_access to a sleeping service
pci_cfg_access_lock and an atomic-compatible variant called
pci_cfg_access_trylock. The former not only blocks user space access as
before but also waits if access was already locked. The latter service
just returns false in this case, allowing the caller to resolve the
conflict instead of raising a BUG.

Adaptions of the ipr driver were originally written by Brian King.

Acked-by: Brian King <brking@linux.vnet.ibm.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/access.c          | 74 ++++++++++++++++++++++++++++---------------
 drivers/pci/iov.c             | 12 +++----
 drivers/pci/pci.c             |  4 +--
 drivers/scsi/ipr.c            | 67 +++++++++++++++++++++++++++++++++++----
 drivers/scsi/ipr.h            |  1 +
 drivers/uio/uio_pci_generic.c |  9 +++---
 include/linux/pci.h           | 14 +++++---
 7 files changed, 131 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index fdaa42aac7c6..0c4c71712dfc 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -127,20 +127,20 @@ EXPORT_SYMBOL(pci_write_vpd);
  * We have a bit per device to indicate it's blocked and a global wait queue
  * for callers to sleep on until devices are unblocked.
  */
-static DECLARE_WAIT_QUEUE_HEAD(pci_ucfg_wait);
+static DECLARE_WAIT_QUEUE_HEAD(pci_cfg_wait);
 
-static noinline void pci_wait_ucfg(struct pci_dev *dev)
+static noinline void pci_wait_cfg(struct pci_dev *dev)
 {
 	DECLARE_WAITQUEUE(wait, current);
 
-	__add_wait_queue(&pci_ucfg_wait, &wait);
+	__add_wait_queue(&pci_cfg_wait, &wait);
 	do {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		raw_spin_unlock_irq(&pci_lock);
 		schedule();
 		raw_spin_lock_irq(&pci_lock);
-	} while (dev->block_ucfg_access);
-	__remove_wait_queue(&pci_ucfg_wait, &wait);
+	} while (dev->block_cfg_access);
+	__remove_wait_queue(&pci_cfg_wait, &wait);
 }
 
 /* Returns 0 on success, negative values indicate error. */
@@ -153,7 +153,8 @@ int pci_user_read_config_##size						\
 	if (PCI_##size##_BAD)						\
 		return -EINVAL;						\
 	raw_spin_lock_irq(&pci_lock);				\
-	if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev);	\
+	if (unlikely(dev->block_cfg_access))				\
+		pci_wait_cfg(dev);					\
 	ret = dev->bus->ops->read(dev->bus, dev->devfn,			\
 					pos, sizeof(type), &data);	\
 	raw_spin_unlock_irq(&pci_lock);				\
@@ -172,7 +173,8 @@ int pci_user_write_config_##size					\
 	if (PCI_##size##_BAD)						\
 		return -EINVAL;						\
 	raw_spin_lock_irq(&pci_lock);				\
-	if (unlikely(dev->block_ucfg_access)) pci_wait_ucfg(dev);	\
+	if (unlikely(dev->block_cfg_access))				\
+		pci_wait_cfg(dev);					\
 	ret = dev->bus->ops->write(dev->bus, dev->devfn,		\
 					pos, sizeof(type), val);	\
 	raw_spin_unlock_irq(&pci_lock);				\
@@ -401,36 +403,56 @@ int pci_vpd_truncate(struct pci_dev *dev, size_t size)
 EXPORT_SYMBOL(pci_vpd_truncate);
 
 /**
- * pci_block_user_cfg_access - Block userspace PCI config reads/writes
+ * pci_cfg_access_lock - Lock PCI config reads/writes
  * @dev:	pci device struct
  *
- * When user access is blocked, any reads or writes to config space will
- * sleep until access is unblocked again.  We don't allow nesting of
- * block/unblock calls.
+ * When access is locked, any userspace reads or writes to config
+ * space and concurrent lock requests will sleep until access is
+ * allowed via pci_cfg_access_unlocked again.
  */
-void pci_block_user_cfg_access(struct pci_dev *dev)
+void pci_cfg_access_lock(struct pci_dev *dev)
+{
+	might_sleep();
+
+	raw_spin_lock_irq(&pci_lock);
+	if (dev->block_cfg_access)
+		pci_wait_cfg(dev);
+	dev->block_cfg_access = 1;
+	raw_spin_unlock_irq(&pci_lock);
+}
+EXPORT_SYMBOL_GPL(pci_cfg_access_lock);
+
+/**
+ * pci_cfg_access_trylock - try to lock PCI config reads/writes
+ * @dev:	pci device struct
+ *
+ * Same as pci_cfg_access_lock, but will return 0 if access is
+ * already locked, 1 otherwise. This function can be used from
+ * atomic contexts.
+ */
+bool pci_cfg_access_trylock(struct pci_dev *dev)
 {
 	unsigned long flags;
-	int was_blocked;
+	bool locked = true;
 
 	raw_spin_lock_irqsave(&pci_lock, flags);
-	was_blocked = dev->block_ucfg_access;
-	dev->block_ucfg_access = 1;
+	if (dev->block_cfg_access)
+		locked = false;
+	else
+		dev->block_cfg_access = 1;
 	raw_spin_unlock_irqrestore(&pci_lock, flags);
 
-	/* If we BUG() inside the pci_lock, we're guaranteed to hose
-	 * the machine */
-	BUG_ON(was_blocked);
+	return locked;
 }
-EXPORT_SYMBOL_GPL(pci_block_user_cfg_access);
+EXPORT_SYMBOL_GPL(pci_cfg_access_trylock);
 
 /**
- * pci_unblock_user_cfg_access - Unblock userspace PCI config reads/writes
+ * pci_cfg_access_unlock - Unlock PCI config reads/writes
  * @dev:	pci device struct
  *
- * This function allows userspace PCI config accesses to resume.
+ * This function allows PCI config accesses to resume.
  */
-void pci_unblock_user_cfg_access(struct pci_dev *dev)
+void pci_cfg_access_unlock(struct pci_dev *dev)
 {
 	unsigned long flags;
 
@@ -438,10 +460,10 @@ void pci_unblock_user_cfg_access(struct pci_dev *dev)
 
 	/* This indicates a problem in the caller, but we don't need
 	 * to kill them, unlike a double-block above. */
-	WARN_ON(!dev->block_ucfg_access);
+	WARN_ON(!dev->block_cfg_access);
 
-	dev->block_ucfg_access = 0;
-	wake_up_all(&pci_ucfg_wait);
+	dev->block_cfg_access = 0;
+	wake_up_all(&pci_cfg_wait);
 	raw_spin_unlock_irqrestore(&pci_lock, flags);
 }
-EXPORT_SYMBOL_GPL(pci_unblock_user_cfg_access);
+EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 1969a3ee3058..6a4d70386a3d 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -348,10 +348,10 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
 	}
 
 	iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
-	pci_block_user_cfg_access(dev);
+	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	msleep(100);
-	pci_unblock_user_cfg_access(dev);
+	pci_cfg_access_unlock(dev);
 
 	iov->initial = initial;
 	if (nr_virtfn < initial)
@@ -379,10 +379,10 @@ failed:
 		virtfn_remove(dev, j, 0);
 
 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
-	pci_block_user_cfg_access(dev);
+	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	ssleep(1);
-	pci_unblock_user_cfg_access(dev);
+	pci_cfg_access_unlock(dev);
 
 	if (iov->link != dev->devfn)
 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
@@ -405,10 +405,10 @@ static void sriov_disable(struct pci_dev *dev)
 		virtfn_remove(dev, i, 0);
 
 	iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
-	pci_block_user_cfg_access(dev);
+	pci_cfg_access_lock(dev);
 	pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
 	ssleep(1);
-	pci_unblock_user_cfg_access(dev);
+	pci_cfg_access_unlock(dev);
 
 	if (iov->link != dev->devfn)
 		sysfs_remove_link(&dev->dev.kobj, "dep_link");
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 6d4a5319148d..c3cca7cdc6e5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2965,7 +2965,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe)
 	might_sleep();
 
 	if (!probe) {
-		pci_block_user_cfg_access(dev);
+		pci_cfg_access_lock(dev);
 		/* block PM suspend, driver probe, etc. */
 		device_lock(&dev->dev);
 	}
@@ -2990,7 +2990,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe)
 done:
 	if (!probe) {
 		device_unlock(&dev->dev);
-		pci_unblock_user_cfg_access(dev);
+		pci_cfg_access_unlock(dev);
 	}
 
 	return rc;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index fd860d952b28..67b169b7a5be 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -7638,8 +7638,12 @@ static int ipr_reset_restore_cfg_space(struct ipr_cmnd *ipr_cmd)
  **/
 static int ipr_reset_bist_done(struct ipr_cmnd *ipr_cmd)
 {
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+
 	ENTER;
-	pci_unblock_user_cfg_access(ipr_cmd->ioa_cfg->pdev);
+	if (ioa_cfg->cfg_locked)
+		pci_cfg_access_unlock(ioa_cfg->pdev);
+	ioa_cfg->cfg_locked = 0;
 	ipr_cmd->job_step = ipr_reset_restore_cfg_space;
 	LEAVE;
 	return IPR_RC_JOB_CONTINUE;
@@ -7660,8 +7664,6 @@ static int ipr_reset_start_bist(struct ipr_cmnd *ipr_cmd)
 	int rc = PCIBIOS_SUCCESSFUL;
 
 	ENTER;
-	pci_block_user_cfg_access(ioa_cfg->pdev);
-
 	if (ioa_cfg->ipr_chip->bist_method == IPR_MMIO)
 		writel(IPR_UPROCI_SIS64_START_BIST,
 		       ioa_cfg->regs.set_uproc_interrupt_reg32);
@@ -7673,7 +7675,9 @@ static int ipr_reset_start_bist(struct ipr_cmnd *ipr_cmd)
 		ipr_reset_start_timer(ipr_cmd, IPR_WAIT_FOR_BIST_TIMEOUT);
 		rc = IPR_RC_JOB_RETURN;
 	} else {
-		pci_unblock_user_cfg_access(ipr_cmd->ioa_cfg->pdev);
+		if (ioa_cfg->cfg_locked)
+			pci_cfg_access_unlock(ipr_cmd->ioa_cfg->pdev);
+		ioa_cfg->cfg_locked = 0;
 		ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_PCI_ACCESS_ERROR);
 		rc = IPR_RC_JOB_CONTINUE;
 	}
@@ -7716,7 +7720,6 @@ static int ipr_reset_slot_reset(struct ipr_cmnd *ipr_cmd)
 	struct pci_dev *pdev = ioa_cfg->pdev;
 
 	ENTER;
-	pci_block_user_cfg_access(pdev);
 	pci_set_pcie_reset_state(pdev, pcie_warm_reset);
 	ipr_cmd->job_step = ipr_reset_slot_reset_done;
 	ipr_reset_start_timer(ipr_cmd, IPR_PCI_RESET_TIMEOUT);
@@ -7724,6 +7727,56 @@ static int ipr_reset_slot_reset(struct ipr_cmnd *ipr_cmd)
 	return IPR_RC_JOB_RETURN;
 }
 
+/**
+ * ipr_reset_block_config_access_wait - Wait for permission to block config access
+ * @ipr_cmd:	ipr command struct
+ *
+ * Description: This attempts to block config access to the IOA.
+ *
+ * Return value:
+ * 	IPR_RC_JOB_CONTINUE / IPR_RC_JOB_RETURN
+ **/
+static int ipr_reset_block_config_access_wait(struct ipr_cmnd *ipr_cmd)
+{
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+	int rc = IPR_RC_JOB_CONTINUE;
+
+	if (pci_cfg_access_trylock(ioa_cfg->pdev)) {
+		ioa_cfg->cfg_locked = 1;
+		ipr_cmd->job_step = ioa_cfg->reset;
+	} else {
+		if (ipr_cmd->u.time_left) {
+			rc = IPR_RC_JOB_RETURN;
+			ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
+			ipr_reset_start_timer(ipr_cmd,
+					      IPR_CHECK_FOR_RESET_TIMEOUT);
+		} else {
+			ipr_cmd->job_step = ioa_cfg->reset;
+			dev_err(&ioa_cfg->pdev->dev,
+				"Timed out waiting to lock config access. Resetting anyway.\n");
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * ipr_reset_block_config_access - Block config access to the IOA
+ * @ipr_cmd:	ipr command struct
+ *
+ * Description: This attempts to block config access to the IOA
+ *
+ * Return value:
+ * 	IPR_RC_JOB_CONTINUE
+ **/
+static int ipr_reset_block_config_access(struct ipr_cmnd *ipr_cmd)
+{
+	ipr_cmd->ioa_cfg->cfg_locked = 0;
+	ipr_cmd->job_step = ipr_reset_block_config_access_wait;
+	ipr_cmd->u.time_left = IPR_WAIT_FOR_RESET_TIMEOUT;
+	return IPR_RC_JOB_CONTINUE;
+}
+
 /**
  * ipr_reset_allowed - Query whether or not IOA can be reset
  * @ioa_cfg:	ioa config struct
@@ -7763,7 +7816,7 @@ static int ipr_reset_wait_to_start_bist(struct ipr_cmnd *ipr_cmd)
 		ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
 		ipr_reset_start_timer(ipr_cmd, IPR_CHECK_FOR_RESET_TIMEOUT);
 	} else {
-		ipr_cmd->job_step = ioa_cfg->reset;
+		ipr_cmd->job_step = ipr_reset_block_config_access;
 		rc = IPR_RC_JOB_CONTINUE;
 	}
 
@@ -7796,7 +7849,7 @@ static int ipr_reset_alert(struct ipr_cmnd *ipr_cmd)
 		writel(IPR_UPROCI_RESET_ALERT, ioa_cfg->regs.set_uproc_interrupt_reg32);
 		ipr_cmd->job_step = ipr_reset_wait_to_start_bist;
 	} else {
-		ipr_cmd->job_step = ioa_cfg->reset;
+		ipr_cmd->job_step = ipr_reset_block_config_access;
 	}
 
 	ipr_cmd->u.time_left = IPR_WAIT_FOR_RESET_TIMEOUT;
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index ac84736c1b9c..b13f9cc12279 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -1387,6 +1387,7 @@ struct ipr_ioa_cfg {
 	u8 msi_received:1;
 	u8 sis64:1;
 	u8 dump_timeout:1;
+	u8 cfg_locked:1;
 
 	u8 revid;
 
diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
index 02bd47bdee1c..56d00c6258f0 100644
--- a/drivers/uio/uio_pci_generic.c
+++ b/drivers/uio/uio_pci_generic.c
@@ -55,7 +55,8 @@ static irqreturn_t irqhandler(int irq, struct uio_info *info)
 	BUILD_BUG_ON(PCI_COMMAND % 4);
 	BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
 
-	pci_block_user_cfg_access(pdev);
+	if (!pci_cfg_access_trylock(pdev))
+		goto error;
 
 	/* Read both command and status registers in a single 32-bit operation.
 	 * Note: we could cache the value for command and move the status read
@@ -79,7 +80,7 @@ static irqreturn_t irqhandler(int irq, struct uio_info *info)
 	ret = IRQ_HANDLED;
 done:
 
-	pci_unblock_user_cfg_access(pdev);
+	pci_cfg_access_lock(pdev);
 	return ret;
 }
 
@@ -91,7 +92,7 @@ static int __devinit verify_pci_2_3(struct pci_dev *pdev)
 	u16 orig, new;
 	int err = 0;
 
-	pci_block_user_cfg_access(pdev);
+	pci_cfg_access_lock(pdev);
 	pci_read_config_word(pdev, PCI_COMMAND, &orig);
 	pci_write_config_word(pdev, PCI_COMMAND,
 			      orig ^ PCI_COMMAND_INTX_DISABLE);
@@ -114,7 +115,7 @@ static int __devinit verify_pci_2_3(struct pci_dev *pdev)
 	/* Now restore the original value. */
 	pci_write_config_word(pdev, PCI_COMMAND, orig);
 err:
-	pci_unblock_user_cfg_access(pdev);
+	pci_cfg_access_unlock(pdev);
 	return err;
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 84225c756bd1..72401596b2a8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -308,7 +308,7 @@ struct pci_dev {
 	unsigned int	is_added:1;
 	unsigned int	is_busmaster:1; /* device is busmaster */
 	unsigned int	no_msi:1;	/* device may not use msi */
-	unsigned int	block_ucfg_access:1;	/* userspace config space access is blocked */
+	unsigned int	block_cfg_access:1;	/* config space access is blocked */
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 	unsigned int	irq_reroute_variant:2;	/* device needs IRQ rerouting variant */
 	unsigned int 	msi_enabled:1;
@@ -1085,8 +1085,9 @@ int  ht_create_irq(struct pci_dev *dev, int idx);
 void ht_destroy_irq(unsigned int irq);
 #endif /* CONFIG_HT_IRQ */
 
-extern void pci_block_user_cfg_access(struct pci_dev *dev);
-extern void pci_unblock_user_cfg_access(struct pci_dev *dev);
+extern void pci_cfg_access_lock(struct pci_dev *dev);
+extern bool pci_cfg_access_trylock(struct pci_dev *dev);
+extern void pci_cfg_access_unlock(struct pci_dev *dev);
 
 /*
  * PCI domain support.  Sometimes called PCI segment (eg by ACPI),
@@ -1283,10 +1284,13 @@ static inline void pci_release_regions(struct pci_dev *dev)
 
 #define pci_dma_burst_advice(pdev, strat, strategy_parameter) do { } while (0)
 
-static inline void pci_block_user_cfg_access(struct pci_dev *dev)
+static inline void pci_block_cfg_access(struct pci_dev *dev)
 { }
 
-static inline void pci_unblock_user_cfg_access(struct pci_dev *dev)
+static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev)
+{ return 0; }
+
+static inline void pci_unblock_cfg_access(struct pci_dev *dev)
 { }
 
 static inline struct pci_bus *pci_find_next_bus(const struct pci_bus *from)
-- 
cgit v1.2.3


From a2e27787f893621c5a6b865acf6b7766f8671328 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 4 Nov 2011 09:46:00 +0100
Subject: PCI: Introduce INTx check & mask API

These new PCI services allow to probe for 2.3-compliant INTx masking
support and then use the feature from PCI interrupt handlers. The
services are properly synchronized with concurrent config space access
via sysfs or on device reset.

This enables generic PCI device drivers like uio_pci_generic or KVM's
device assignment to implement the necessary kernel-side IRQ handling
without any knowledge about device-specific interrupt status and control
registers.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/access.c |   2 +-
 drivers/pci/pci.c    | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.h    |   2 +
 include/linux/pci.h  |   3 ++
 4 files changed, 116 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 0c4c71712dfc..2a581642c237 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -13,7 +13,7 @@
  * configuration space.
  */
 
-static DEFINE_RAW_SPINLOCK(pci_lock);
+DEFINE_RAW_SPINLOCK(pci_lock);
 
 /*
  *  Wrappers for all PCI configuration access functions.  They just check
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c3cca7cdc6e5..924193ef4fe1 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2767,6 +2767,116 @@ pci_intx(struct pci_dev *pdev, int enable)
 	}
 }
 
+/**
+ * pci_intx_mask_supported - probe for INTx masking support
+ * @pdev: the PCI device to operate on
+ *
+ * Check if the device dev support INTx masking via the config space
+ * command word.
+ */
+bool pci_intx_mask_supported(struct pci_dev *dev)
+{
+	bool mask_supported = false;
+	u16 orig, new;
+
+	pci_cfg_access_lock(dev);
+
+	pci_read_config_word(dev, PCI_COMMAND, &orig);
+	pci_write_config_word(dev, PCI_COMMAND,
+			      orig ^ PCI_COMMAND_INTX_DISABLE);
+	pci_read_config_word(dev, PCI_COMMAND, &new);
+
+	/*
+	 * There's no way to protect against hardware bugs or detect them
+	 * reliably, but as long as we know what the value should be, let's
+	 * go ahead and check it.
+	 */
+	if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
+		dev_err(&dev->dev, "Command register changed from "
+			"0x%x to 0x%x: driver or hardware bug?\n", orig, new);
+	} else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) {
+		mask_supported = true;
+		pci_write_config_word(dev, PCI_COMMAND, orig);
+	}
+
+	pci_cfg_access_unlock(dev);
+	return mask_supported;
+}
+EXPORT_SYMBOL_GPL(pci_intx_mask_supported);
+
+static bool pci_check_and_set_intx_mask(struct pci_dev *dev, bool mask)
+{
+	struct pci_bus *bus = dev->bus;
+	bool mask_updated = true;
+	u32 cmd_status_dword;
+	u16 origcmd, newcmd;
+	unsigned long flags;
+	bool irq_pending;
+
+	/*
+	 * We do a single dword read to retrieve both command and status.
+	 * Document assumptions that make this possible.
+	 */
+	BUILD_BUG_ON(PCI_COMMAND % 4);
+	BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
+
+	raw_spin_lock_irqsave(&pci_lock, flags);
+
+	bus->ops->read(bus, dev->devfn, PCI_COMMAND, 4, &cmd_status_dword);
+
+	irq_pending = (cmd_status_dword >> 16) & PCI_STATUS_INTERRUPT;
+
+	/*
+	 * Check interrupt status register to see whether our device
+	 * triggered the interrupt (when masking) or the next IRQ is
+	 * already pending (when unmasking).
+	 */
+	if (mask != irq_pending) {
+		mask_updated = false;
+		goto done;
+	}
+
+	origcmd = cmd_status_dword;
+	newcmd = origcmd & ~PCI_COMMAND_INTX_DISABLE;
+	if (mask)
+		newcmd |= PCI_COMMAND_INTX_DISABLE;
+	if (newcmd != origcmd)
+		bus->ops->write(bus, dev->devfn, PCI_COMMAND, 2, newcmd);
+
+done:
+	raw_spin_unlock_irqrestore(&pci_lock, flags);
+
+	return mask_updated;
+}
+
+/**
+ * pci_check_and_mask_intx - mask INTx on pending interrupt
+ * @pdev: the PCI device to operate on
+ *
+ * Check if the device dev has its INTx line asserted, mask it and
+ * return true in that case. False is returned if not interrupt was
+ * pending.
+ */
+bool pci_check_and_mask_intx(struct pci_dev *dev)
+{
+	return pci_check_and_set_intx_mask(dev, true);
+}
+EXPORT_SYMBOL_GPL(pci_check_and_mask_intx);
+
+/**
+ * pci_check_and_mask_intx - unmask INTx of no interrupt is pending
+ * @pdev: the PCI device to operate on
+ *
+ * Check if the device dev has its INTx line asserted, unmask it if not
+ * and return true. False is returned and the mask remains active if
+ * there was still an interrupt pending.
+ */
+bool pci_check_and_unmask_intx(struct pci_dev *dev)
+{
+	return pci_check_and_set_intx_mask(dev, false);
+}
+EXPORT_SYMBOL_GPL(pci_check_and_unmask_intx);
+
 /**
  * pci_msi_off - disables any msi or msix capabilities
  * @dev: the PCI device to operate on
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b74084e9ca12..3b6e4ed306b6 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -136,6 +136,8 @@ static inline void pci_remove_legacy_files(struct pci_bus *bus) { return; }
 /* Lock for read/write access to pci device and bus lists */
 extern struct rw_semaphore pci_bus_sem;
 
+extern raw_spinlock_t pci_lock;
+
 extern unsigned int pci_pm_d3_delay;
 
 #ifdef CONFIG_PCI_MSI
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 72401596b2a8..4286b853956e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -804,6 +804,9 @@ int __must_check pci_set_mwi(struct pci_dev *dev);
 int pci_try_set_mwi(struct pci_dev *dev);
 void pci_clear_mwi(struct pci_dev *dev);
 void pci_intx(struct pci_dev *dev, int enable);
+bool pci_intx_mask_supported(struct pci_dev *dev);
+bool pci_check_and_mask_intx(struct pci_dev *dev);
+bool pci_check_and_unmask_intx(struct pci_dev *dev);
 void pci_msi_off(struct pci_dev *dev);
 int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size);
 int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask);
-- 
cgit v1.2.3


From cfce9fb808d7d25f6ea18a804eb71b08c7d777c1 Mon Sep 17 00:00:00 2001
From: Myron Stowe <mstowe@redhat.com>
Date: Fri, 28 Oct 2011 15:47:35 -0600
Subject: PCI: add declaration for pcibios_set_master() to pci core

Currently, pcibios_set_master() is implemented in architecture-
specific code.  There is nothing architecture-specific about PCI's
'latency timer'.

This patch adds a declaration for pcibios_set_master() to PCI's core
in preperation for pulling the function itself up into the core.
Without the addition of this declaration, subsequent patches that
remove inline definitions of pcibios_set_master() would be removing
the only declaration of such.

No functional change.

Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4286b853956e..569341d2d527 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1453,8 +1453,10 @@ extern u8 pci_cache_line_size;
 extern unsigned long pci_hotplug_io_size;
 extern unsigned long pci_hotplug_mem_size;
 
+/* Architecture specific versions may override these (weak) */
 int pcibios_add_platform_entries(struct pci_dev *dev);
 void pcibios_disable_device(struct pci_dev *dev);
+void pcibios_set_master(struct pci_dev *dev);
 int pcibios_set_pcie_reset_state(struct pci_dev *dev,
 				 enum pcie_reset_state state);
 
-- 
cgit v1.2.3


From 96c5590058d7fded14f43af2ab521436cecf3125 Mon Sep 17 00:00:00 2001
From: Myron Stowe <mstowe@redhat.com>
Date: Fri, 28 Oct 2011 15:48:38 -0600
Subject: PCI: Pull PCI 'latency timer' setup up into the core

The 'latency timer' of PCI devices, both Type 0 and Type 1,
is setup in architecture-specific code [see: 'pcibios_set_master()'].
There are two approaches being taken by all the architectures - check
if the 'latency timer' is currently set between 16 and 255 and if not
bring it within bounds, or, do nothing (and then there is the
gratuitously different PA-RISC implementation).

There is nothing architecture-specific about PCI's 'latency timer' so
this patch pulls its setup functionality up into the PCI core by
creating a generic 'pcibios_set_master()' function using the '__weak'
attribute which can be used by all architectures as a default which,
if necessary, can then be over-ridden by architecture-specific code.

No functional change.

Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/blackfin/include/asm/pci.h         |  4 ----
 arch/frv/mb93090-mb00/pci-frv.c         |  6 ------
 arch/frv/mb93090-mb00/pci-frv.h         |  2 --
 arch/h8300/include/asm/pci.h            |  5 -----
 arch/mips/pci/pci.c                     |  6 ------
 arch/mn10300/unit-asb2305/pci-asb2305.c |  6 ------
 arch/mn10300/unit-asb2305/pci-asb2305.h |  2 --
 arch/sh/drivers/pci/pci.c               |  6 ------
 arch/x86/include/asm/pci_x86.h          |  2 --
 arch/x86/pci/i386.c                     |  6 ------
 drivers/pci/pci.c                       | 29 +++++++++++++++++++++++++++++
 include/linux/pci.h                     |  3 +++
 12 files changed, 32 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/include/asm/pci.h b/arch/blackfin/include/asm/pci.h
index 99cae2e3bac7..74352c4597d9 100644
--- a/arch/blackfin/include/asm/pci.h
+++ b/arch/blackfin/include/asm/pci.h
@@ -10,10 +10,6 @@
 #define PCIBIOS_MIN_IO 0x00001000
 #define PCIBIOS_MIN_MEM 0x10000000
 
-static inline void pcibios_set_master(struct pci_dev *dev)
-{
-	/* No special bus mastering setup handling */
-}
 static inline void pcibios_penalize_isa_irq(int irq)
 {
 	/* We don't do dynamic PCI IRQ allocation */
diff --git a/arch/frv/mb93090-mb00/pci-frv.c b/arch/frv/mb93090-mb00/pci-frv.c
index 6b4fb28e9f99..6a0cd644d7cc 100644
--- a/arch/frv/mb93090-mb00/pci-frv.c
+++ b/arch/frv/mb93090-mb00/pci-frv.c
@@ -195,12 +195,6 @@ void __init pcibios_resource_survey(void)
 	pcibios_assign_resources();
 }
 
-/*
- *  If we set up a device for bus mastering, we need to check the latency
- *  timer as certain crappy BIOSes forget to set it properly.
- */
-unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/arch/frv/mb93090-mb00/pci-frv.h b/arch/frv/mb93090-mb00/pci-frv.h
index f3fe55914793..089eeba4f3bc 100644
--- a/arch/frv/mb93090-mb00/pci-frv.h
+++ b/arch/frv/mb93090-mb00/pci-frv.h
@@ -26,8 +26,6 @@ extern unsigned int __nongpreldata pci_probe;
 
 /* pci-frv.c */
 
-extern unsigned int pcibios_max_latency;
-
 void pcibios_resource_survey(void);
 
 /* pci-vdk.c */
diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h
index cc9762091c0a..0b2acaa3dd84 100644
--- a/arch/h8300/include/asm/pci.h
+++ b/arch/h8300/include/asm/pci.h
@@ -9,11 +9,6 @@
 
 #define pcibios_assign_all_busses()	0
 
-static inline void pcibios_set_master(struct pci_dev *dev)
-{
-	/* No special bus mastering setup handling */
-}
-
 static inline void pcibios_penalize_isa_irq(int irq, int active)
 {
 	/* We don't do dynamic PCI IRQ allocation */
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 41af7fa2887b..f93f749b92e3 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -205,12 +205,6 @@ static int pcibios_enable_resources(struct pci_dev *dev, int mask)
 	return 0;
 }
 
-/*
- *  If we set up a device for bus mastering, we need to check the latency
- *  timer as certain crappy BIOSes forget to set it properly.
- */
-static unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.c b/arch/mn10300/unit-asb2305/pci-asb2305.c
index 8e6763e6f250..2b299c413ae5 100644
--- a/arch/mn10300/unit-asb2305/pci-asb2305.c
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.c
@@ -213,12 +213,6 @@ void __init pcibios_resource_survey(void)
 	pcibios_allocate_resources(1);
 }
 
-/*
- *  If we set up a device for bus mastering, we need to check the latency
- *  timer as certain crappy BIOSes forget to set it properly.
- */
-unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/arch/mn10300/unit-asb2305/pci-asb2305.h b/arch/mn10300/unit-asb2305/pci-asb2305.h
index c3fa294b6e28..1194fe486b01 100644
--- a/arch/mn10300/unit-asb2305/pci-asb2305.h
+++ b/arch/mn10300/unit-asb2305/pci-asb2305.h
@@ -31,8 +31,6 @@ extern unsigned int pci_probe;
 
 /* pci-asb2305.c */
 
-extern unsigned int pcibios_max_latency;
-
 extern void pcibios_resource_survey(void);
 
 /* pci.c */
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index c2691afe8f79..cfdb2f652949 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -243,12 +243,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-/*
- *  If we set up a device for bus mastering, we need to check and set
- *  the latency timer as it may not be properly set.
- */
-static unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index e38197806853..b3a531746026 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -44,8 +44,6 @@ enum pci_bf_sort_state {
 
 /* pci-i386.c */
 
-extern unsigned int pcibios_max_latency;
-
 void pcibios_resource_survey(void);
 void pcibios_set_cache_line_size(void);
 
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 794b092d01ae..dd5806b0fc8b 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -254,12 +254,6 @@ void __init pcibios_resource_survey(void)
  */
 fs_initcall(pcibios_assign_resources);
 
-/*
- *  If we set up a device for bus mastering, we need to check the latency
- *  timer as certain crappy BIOSes forget to set it properly.
- */
-unsigned int pcibios_max_latency = 255;
-
 void pcibios_set_master(struct pci_dev *dev)
 {
 	u8 lat;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 924193ef4fe1..f9abe84cf5e0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -88,6 +88,12 @@ enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF;
 u8 pci_dfl_cache_line_size __devinitdata = L1_CACHE_BYTES >> 2;
 u8 pci_cache_line_size;
 
+/*
+ * If we set up a device for bus mastering, we need to check the latency
+ * timer as certain BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -2595,6 +2601,29 @@ static void __pci_set_master(struct pci_dev *dev, bool enable)
 	dev->is_busmaster = enable;
 }
 
+/**
+ * pcibios_set_master - enable PCI bus-mastering for device dev
+ * @dev: the PCI device to enable
+ *
+ * Enables PCI bus-mastering for the device.  This is the default
+ * implementation.  Architecture specific implementations can override
+ * this if necessary.
+ */
+void __weak pcibios_set_master(struct pci_dev *dev)
+{
+	u8 lat;
+
+	pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+	if (lat < 16)
+		lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+	else if (lat > pcibios_max_latency)
+		lat = pcibios_max_latency;
+	else
+		return;
+	dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
 /**
  * pci_set_master - enables bus-mastering for device dev
  * @dev: the PCI device to enable
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 569341d2d527..4c16a5788998 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -795,8 +795,11 @@ static inline int pci_is_managed(struct pci_dev *pdev)
 }
 
 void pci_disable_device(struct pci_dev *dev);
+
+extern unsigned int pcibios_max_latency;
 void pci_set_master(struct pci_dev *dev);
 void pci_clear_master(struct pci_dev *dev);
+
 int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state);
 int pci_set_cacheline_size(struct pci_dev *dev);
 #define HAVE_PCI_SET_MWI
-- 
cgit v1.2.3


From 45ca9e9730c5acdb482dd95799fd8ac834481897 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:25:35 -0600
Subject: PCI: add helpers for building PCI bus resource lists

We'd like to supply a list of resources when we create a new PCI bus,
e.g., the root bus under a PCI host bridge.  These are helpers for
constructing that list.

These are exported because the plan is to replace this exported interface:
    pci_scan_bus_parented()
with this one:
    pci_add_resource(resources, ...)
    pci_scan_root_bus(..., resources)

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/bus.c   | 32 +++++++++++++++++++++++++++-----
 include/linux/pci.h |  2 ++
 2 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 1e2ad92a4752..398f5d859791 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -18,6 +18,32 @@
 
 #include "pci.h"
 
+void pci_add_resource(struct list_head *resources, struct resource *res)
+{
+	struct pci_bus_resource *bus_res;
+
+	bus_res = kzalloc(sizeof(struct pci_bus_resource), GFP_KERNEL);
+	if (!bus_res) {
+		printk(KERN_ERR "PCI: can't add bus resource %pR\n", res);
+		return;
+	}
+
+	bus_res->res = res;
+	list_add_tail(&bus_res->list, resources);
+}
+EXPORT_SYMBOL(pci_add_resource);
+
+void pci_free_resource_list(struct list_head *resources)
+{
+	struct pci_bus_resource *bus_res, *tmp;
+
+	list_for_each_entry_safe(bus_res, tmp, resources, list) {
+		list_del(&bus_res->list);
+		kfree(bus_res);
+	}
+}
+EXPORT_SYMBOL(pci_free_resource_list);
+
 void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
 			  unsigned int flags)
 {
@@ -52,16 +78,12 @@ EXPORT_SYMBOL_GPL(pci_bus_resource_n);
 
 void pci_bus_remove_resources(struct pci_bus *bus)
 {
-	struct pci_bus_resource *bus_res, *tmp;
 	int i;
 
 	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
 		bus->resource[i] = NULL;
 
-	list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
-		list_del(&bus_res->list);
-		kfree(bus_res);
-	}
+	pci_free_resource_list(&bus->resources);
 }
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4c16a5788998..9daa79901122 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -917,6 +917,8 @@ int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
 void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
+void pci_add_resource(struct list_head *resources, struct resource *res);
+void pci_free_resource_list(struct list_head *resources);
 void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, unsigned int flags);
 struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n);
 void pci_bus_remove_resources(struct pci_bus *bus);
-- 
cgit v1.2.3


From 166c6370754a0a92386e2ffb0eeb06e50ac8588d Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:25:45 -0600
Subject: PCI: add pci_create_root_bus() that accepts resource list

pci_create_bus() assigns ioport_resource and iomem_resource as the default
bus resources, i.e., the entire address space.  Architectures fix these
later, typically in pcibios_fixup_bus() or after pci_scan_bus_parented()
returns, but code that runs in the interim sees incorrect resource
information.

This patch adds a new pci_create_root_bus() that sets the bus resources
correctly from a supplied list of resources.

I intend to remove pci_create_bus() after changing all callers.

Based on original patch by Deng-Cheng Zhu.

Reference: http://www.spinics.net/lists/mips/msg41654.html
Reference: https://lkml.org/lkml/2011/8/26/88
Signed-off-by: Deng-Cheng Zhu <dczhu@mips.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 25 +++++++++++++++++++++----
 include/linux/pci.h |  3 +++
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 2f0b14451d9d..89ecded0581b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1522,12 +1522,13 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
 	return max;
 }
 
-struct pci_bus * pci_create_bus(struct device *parent,
-		int bus, struct pci_ops *ops, void *sysdata)
+struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
 {
 	int error, i;
 	struct pci_bus *b, *b2;
 	struct device *dev;
+	struct pci_bus_resource *bus_res, *n;
 	struct resource *res;
 
 	b = pci_alloc_bus();
@@ -1578,8 +1579,10 @@ struct pci_bus * pci_create_bus(struct device *parent,
 	pci_create_legacy_files(b);
 
 	b->number = b->secondary = bus;
-	b->resource[0] = &ioport_resource;
-	b->resource[1] = &iomem_resource;
+
+	/* Add initial resources to the bus */
+	list_for_each_entry_safe(bus_res, n, resources, list)
+		list_move_tail(&bus_res->list, &b->resources);
 
 	if (parent)
 		dev_info(parent, "PCI host bridge to bus %s\n", dev_name(&b->dev));
@@ -1605,6 +1608,20 @@ err_out:
 	return NULL;
 }
 
+struct pci_bus *pci_create_bus(struct device *parent,
+		int bus, struct pci_ops *ops, void *sysdata)
+{
+	LIST_HEAD(resources);
+	struct pci_bus *b;
+
+	pci_add_resource(&resources, &ioport_resource);
+	pci_add_resource(&resources, &iomem_resource);
+	b = pci_create_root_bus(parent, bus, ops, sysdata, &resources);
+	if (!b)
+		pci_free_resource_list(&resources);
+	return b;
+}
+
 struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
 		int bus, struct pci_ops *ops, void *sysdata)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9daa79901122..eacb1e51e11b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -670,6 +670,9 @@ static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *o
 		pci_bus_add_devices(root_bus);
 	return root_bus;
 }
+struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
+				    struct pci_ops *ops, void *sysdata,
+				    struct list_head *resources);
 struct pci_bus *pci_create_bus(struct device *parent, int bus,
 			       struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
-- 
cgit v1.2.3


From a2ebb827958a4ab3577443f89037f229683c644a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:25:50 -0600
Subject: PCI: add pci_scan_root_bus() that accepts resource list

"Early" and "header" quirks often use incorrect bus resources because they
see the default resources assigned by pci_create_bus(), before the
architecture fixes them up (typically in pcibios_fixup_bus()).  Regions
reserved by these quirks end up with the wrong parents.

Here's the standard path for scanning a PCI root bus:

  pci_scan_bus or pci_scan_bus_parented
    pci_create_bus                     <-- A create with default resources
    pci_scan_child_bus
      pci_scan_slot
        pci_scan_single_device
          pci_scan_device
            pci_setup_device
              pci_fixup_device(early)  <-- B
          pci_device_add
            pci_fixup_device(header)   <-- C
      pcibios_fixup_bus                <-- D fill in correct resources

Early and header quirks at B and C use the default (incorrect) root bus
resources rather than those filled in at D.

This patch adds a new pci_scan_root_bus() function that sets the bus
resources correctly from a supplied list of resources.

I intend to remove pci_scan_bus() and pci_scan_bus_parented() after
fixing all callers.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 15 +++++++++++++++
 include/linux/pci.h |  3 +++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 89ecded0581b..7fc7e14118cc 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1608,6 +1608,21 @@ err_out:
 	return NULL;
 }
 
+struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+{
+	struct pci_bus *b;
+
+	b = pci_create_root_bus(parent, bus, ops, sysdata, resources);
+	if (!b)
+		return NULL;
+
+	b->subordinate = pci_scan_child_bus(b);
+	pci_bus_add_devices(b);
+	return b;
+}
+EXPORT_SYMBOL(pci_scan_root_bus);
+
 struct pci_bus *pci_create_bus(struct device *parent,
 		int bus, struct pci_ops *ops, void *sysdata)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eacb1e51e11b..5102d74f6bfc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -673,6 +673,9 @@ static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *o
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 				    struct pci_ops *ops, void *sysdata,
 				    struct list_head *resources);
+struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
+					     struct pci_ops *ops, void *sysdata,
+					     struct list_head *resources);
 struct pci_bus *pci_create_bus(struct device *parent, int bus,
 			       struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
-- 
cgit v1.2.3


From de4b2f76d69673cea08be952dcb4df2f4c81c6e3 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:25:55 -0600
Subject: PCI: convert pci_scan_bus() to use pci_create_root_bus()

I plan to deprecate pci_scan_bus_parented(), so use pci_create_root_bus()
directly instead.  pci_scan_bus() itself will be removed as soon as all
callers are gone, so this is just an interim step.

v2: export pci_scan_bus

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 19 +++++++++++++++++++
 include/linux/pci.h | 10 +---------
 2 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7fc7e14118cc..d09644b52d1c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1649,6 +1649,25 @@ struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
 }
 EXPORT_SYMBOL(pci_scan_bus_parented);
 
+struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
+					void *sysdata)
+{
+	LIST_HEAD(resources);
+	struct pci_bus *b;
+
+	pci_add_resource(&resources, &ioport_resource);
+	pci_add_resource(&resources, &iomem_resource);
+	b = pci_create_root_bus(NULL, bus, ops, sysdata, &resources);
+	if (b) {
+		b->subordinate = pci_scan_child_bus(b);
+		pci_bus_add_devices(b);
+	} else {
+		pci_free_resource_list(&resources);
+	}
+	return b;
+}
+EXPORT_SYMBOL(pci_scan_bus);
+
 #ifdef CONFIG_HOTPLUG
 /**
  * pci_rescan_bus - scan a PCI bus for devices.
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5102d74f6bfc..ff280e08690f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -661,15 +661,7 @@ extern struct pci_bus *pci_find_bus(int domain, int busnr);
 void pci_bus_add_devices(const struct pci_bus *bus);
 struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
 				      struct pci_ops *ops, void *sysdata);
-static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
-					   void *sysdata)
-{
-	struct pci_bus *root_bus;
-	root_bus = pci_scan_bus_parented(NULL, bus, ops, sysdata);
-	if (root_bus)
-		pci_bus_add_devices(root_bus);
-	return root_bus;
-}
+struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 				    struct pci_ops *ops, void *sysdata,
 				    struct list_head *resources);
-- 
cgit v1.2.3


From 118faafaf987f521832843d36c6be580983f9a6b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 28 Oct 2011 16:28:24 -0600
Subject: PCI: remove pci_create_bus()

All users of pci_create_bus() have been converted to pci_create_root_bus(),
so remove it.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/probe.c | 14 --------------
 include/linux/pci.h |  2 --
 2 files changed, 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 944ea2170f48..7cc9e2f0f47c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1623,20 +1623,6 @@ struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
 }
 EXPORT_SYMBOL(pci_scan_root_bus);
 
-struct pci_bus *pci_create_bus(struct device *parent,
-		int bus, struct pci_ops *ops, void *sysdata)
-{
-	LIST_HEAD(resources);
-	struct pci_bus *b;
-
-	pci_add_resource(&resources, &ioport_resource);
-	pci_add_resource(&resources, &iomem_resource);
-	b = pci_create_root_bus(parent, bus, ops, sysdata, &resources);
-	if (!b)
-		pci_free_resource_list(&resources);
-	return b;
-}
-
 /* Deprecated; use pci_scan_root_bus() instead */
 struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent,
 		int bus, struct pci_ops *ops, void *sysdata)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ff280e08690f..174fe8aabdde 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -668,8 +668,6 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 struct pci_bus * __devinit pci_scan_root_bus(struct device *parent, int bus,
 					     struct pci_ops *ops, void *sysdata,
 					     struct list_head *resources);
-struct pci_bus *pci_create_bus(struct device *parent, int bus,
-			       struct pci_ops *ops, void *sysdata);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
 				int busnr);
 void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
-- 
cgit v1.2.3


From aecab53f45b84fbc7d6848957f9d83e1c3548b17 Mon Sep 17 00:00:00 2001
From: Alessandro Rubini <rubini@gnudd.com>
Date: Fri, 6 Jan 2012 13:32:41 +0100
Subject: PCI: pci_ids: add device ids for STA2X11 device (aka ConneXT)

The chip is an I/O hub used by some Atom boards.  Most of those
symbols are used in arch/x86/platform/sta2x11/sta2x11.c (to be
introduced) and in the specific drivers as well.

Signed-off-by: Alessandro Rubini <rubini@gnudd.com>
Acked-by: Giancarlo Asnaghi <giancarlo.asnaghi@st.com>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_ids.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2aaee0ca9da8..31d77af2ef42 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -776,6 +776,29 @@
 #define PCI_DEVICE_ID_ELSA_QS3000	0x3000
 
 #define PCI_VENDOR_ID_STMICRO		0x104A
+#define PCI_DEVICE_ID_STMICRO_USB_HOST	0xCC00
+#define PCI_DEVICE_ID_STMICRO_USB_OHCI	0xCC01
+#define PCI_DEVICE_ID_STMICRO_USB_OTG	0xCC02
+#define PCI_DEVICE_ID_STMICRO_UART_HWFC 0xCC03
+#define PCI_DEVICE_ID_STMICRO_UART_NO_HWFC	0xCC04
+#define PCI_DEVICE_ID_STMICRO_SOC_DMA	0xCC05
+#define PCI_DEVICE_ID_STMICRO_SATA	0xCC06
+#define PCI_DEVICE_ID_STMICRO_I2C	0xCC07
+#define PCI_DEVICE_ID_STMICRO_SPI_HS	0xCC08
+#define PCI_DEVICE_ID_STMICRO_MAC	0xCC09
+#define PCI_DEVICE_ID_STMICRO_SDIO_EMMC 0xCC0A
+#define PCI_DEVICE_ID_STMICRO_SDIO	0xCC0B
+#define PCI_DEVICE_ID_STMICRO_GPIO	0xCC0C
+#define PCI_DEVICE_ID_STMICRO_VIP	0xCC0D
+#define PCI_DEVICE_ID_STMICRO_AUDIO_ROUTER_DMA	0xCC0E
+#define PCI_DEVICE_ID_STMICRO_AUDIO_ROUTER_SRCS 0xCC0F
+#define PCI_DEVICE_ID_STMICRO_AUDIO_ROUTER_MSPS 0xCC10
+#define PCI_DEVICE_ID_STMICRO_CAN	0xCC11
+#define PCI_DEVICE_ID_STMICRO_MLB	0xCC12
+#define PCI_DEVICE_ID_STMICRO_DBP	0xCC13
+#define PCI_DEVICE_ID_STMICRO_SATA_PHY	0xCC14
+#define PCI_DEVICE_ID_STMICRO_ESRAM	0xCC15
+#define PCI_DEVICE_ID_STMICRO_VIC	0xCC16
 
 #define PCI_VENDOR_ID_BUSLOGIC		      0x104B
 #define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
-- 
cgit v1.2.3


From cda57bf9348fdbf4b8a830d6f9eb7da81df2f486 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 4 Jan 2012 15:49:45 -0800
Subject: PCI: DEVICE_COUNT_RESOURCE should be equal to PCI_NUM_RESOURCES

Save some bytes for device resource array.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 174fe8aabdde..5d06e340da9a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -111,7 +111,7 @@ enum {
 	PCI_NUM_RESOURCES,
 
 	/* preserve this for compatibility */
-	DEVICE_COUNT_RESOURCE
+	DEVICE_COUNT_RESOURCE = PCI_NUM_RESOURCES,
 };
 
 typedef int __bitwise pci_power_t;
-- 
cgit v1.2.3


From fb7ebfe4108e2cdfa2bb88e57148087717463dfa Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 4 Jan 2012 15:50:02 -0800
Subject: PCI: Increase resource array mask bit size in pcim_iomap_regions()

DEVICE_COUNT_RESOURCE will be bigger than 16 when SRIOV supported is enabled.

Let them pass with int just like pci_enable_resources().

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 6 +++---
 lib/devres.c        | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5d06e340da9a..a16b1df3deff 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1432,10 +1432,10 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass,
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
 void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
 void __iomem * const *pcim_iomap_table(struct pci_dev *pdev);
-int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name);
-int pcim_iomap_regions_request_all(struct pci_dev *pdev, u16 mask,
+int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name);
+int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
 				   const char *name);
-void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask);
+void pcim_iounmap_regions(struct pci_dev *pdev, int mask);
 
 extern int pci_pci_problems;
 #define PCIPCI_FAIL		1	/* No PCI PCI DMA */
diff --git a/lib/devres.c b/lib/devres.c
index 7c0e953a7486..2c851bd903bd 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -253,7 +253,7 @@ EXPORT_SYMBOL(pcim_iounmap);
  *
  * Request and iomap regions specified by @mask.
  */
-int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name)
+int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name)
 {
 	void __iomem * const *iomap;
 	int i, rc;
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(pcim_iomap_regions);
  *
  * Request all PCI BARs and iomap regions specified by @mask.
  */
-int pcim_iomap_regions_request_all(struct pci_dev *pdev, u16 mask,
+int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask,
 				   const char *name)
 {
 	int request_mask = ((1 << 6) - 1) & ~mask;
@@ -330,7 +330,7 @@ EXPORT_SYMBOL(pcim_iomap_regions_request_all);
  *
  * Unmap and release regions specified by @mask.
  */
-void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
+void pcim_iounmap_regions(struct pci_dev *pdev, int mask)
 {
 	void __iomem * const *iomap;
 	int i;
-- 
cgit v1.2.3


From 6c06108be53ca5e94d8b0e93883d534dd9079646 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 5 Jan 2012 02:27:57 -0300
Subject: [media] V4L/DVB: v4l2-ioctl: integer overflow in video_usercopy()

If ctrls->count is too high the multiplication could overflow and
array_size would be lower than expected.  Mauro and Hans Verkuil
suggested that we cap it at 1024.  That comes from the maximum
number of controls with lots of room for expantion.

$ grep V4L2_CID include/linux/videodev2.h | wc -l
211

Cc: stable <stable@vger.kernel.org>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-ioctl.c | 4 ++++
 include/linux/videodev2.h        | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index e1da8fc9dd2f..639abeee3392 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -2226,6 +2226,10 @@ static int check_array_args(unsigned int cmd, void *parg, size_t *array_size,
 		struct v4l2_ext_controls *ctrls = parg;
 
 		if (ctrls->count != 0) {
+			if (ctrls->count > V4L2_CID_MAX_CTRLS) {
+				ret = -EINVAL;
+				break;
+			}
 			*user_ptr = (void __user *)ctrls->controls;
 			*kernel_ptr = (void *)&ctrls->controls;
 			*array_size = sizeof(struct v4l2_ext_control)
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 6bfaa767a817..b2e1331ca76b 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1132,6 +1132,7 @@ struct v4l2_querymenu {
 #define V4L2_CTRL_FLAG_NEXT_CTRL	0x80000000
 
 /*  User-class control IDs defined by V4L2 */
+#define V4L2_CID_MAX_CTRLS		1024
 #define V4L2_CID_BASE			(V4L2_CTRL_CLASS_USER | 0x900)
 #define V4L2_CID_USER_BASE 		V4L2_CID_BASE
 /*  IDs reserved for driver specific controls */
-- 
cgit v1.2.3


From 6926afd1925a54a13684ebe05987868890665e2b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 7 Jan 2012 13:22:46 -0500
Subject: NFSv4: Save the owner/group name string when doing open

...so that we can do the uid/gid mapping outside the asynchronous RPC
context.
This fixes a bug in the current NFSv4 atomic open code where the client
isn't able to determine what the true uid/gid fields of the file are,
(because the asynchronous nature of the OPEN call denies it the ability
to do an upcall) and so fills them with default values, marking the
inode as needing revalidation.
Unfortunately, in some cases, the VFS will do some additional sanity
checks on the file, and may override the server's decision to allow
the open because it sees the wrong owner/group fields.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c            |  83 ++++++++++++++++++++++++++++++++++++
 fs/nfs/inode.c            |   2 +
 fs/nfs/nfs4proc.c         |  10 +++++
 fs/nfs/nfs4xdr.c          | 106 ++++++++++++++++++++--------------------------
 include/linux/nfs_idmap.h |   8 ++++
 include/linux/nfs_xdr.h   |  17 +++++---
 6 files changed, 162 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 47d1c6ff2d8e..2c05f1991e1e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -38,6 +38,89 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/nfs_idmap.h>
+#include <linux/nfs_fs.h>
+
+/**
+ * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
+ * @fattr: fully initialised struct nfs_fattr
+ * @owner_name: owner name string cache
+ * @group_name: group name string cache
+ */
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+		struct nfs4_string *owner_name,
+		struct nfs4_string *group_name)
+{
+	fattr->owner_name = owner_name;
+	fattr->group_name = group_name;
+}
+
+static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr)
+{
+	fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME;
+	kfree(fattr->owner_name->data);
+}
+
+static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
+{
+	fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME;
+	kfree(fattr->group_name->data);
+}
+
+static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+	struct nfs4_string *owner = fattr->owner_name;
+	__u32 uid;
+
+	if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
+		return false;
+	if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) {
+		fattr->uid = uid;
+		fattr->valid |= NFS_ATTR_FATTR_OWNER;
+	}
+	return true;
+}
+
+static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+	struct nfs4_string *group = fattr->group_name;
+	__u32 gid;
+
+	if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
+		return false;
+	if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) {
+		fattr->gid = gid;
+		fattr->valid |= NFS_ATTR_FATTR_GROUP;
+	}
+	return true;
+}
+
+/**
+ * nfs_fattr_free_names - free up the NFSv4 owner and group strings
+ * @fattr: a fully initialised nfs_fattr structure
+ */
+void nfs_fattr_free_names(struct nfs_fattr *fattr)
+{
+	if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)
+		nfs_fattr_free_owner_name(fattr);
+	if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)
+		nfs_fattr_free_group_name(fattr);
+}
+
+/**
+ * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free
+ * @server: pointer to the filesystem nfs_server structure
+ * @fattr: a fully initialised nfs_fattr structure
+ *
+ * This helper maps the cached NFSv4 owner/group strings in fattr into
+ * their numeric uid/gid equivalents, and then frees the cached strings.
+ */
+void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+	if (nfs_fattr_map_owner_name(server, fattr))
+		nfs_fattr_free_owner_name(fattr);
+	if (nfs_fattr_map_group_name(server, fattr))
+		nfs_fattr_free_group_name(fattr);
+}
 
 static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
 {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a15fa8cf98..f59cab12a8ee 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1019,6 +1019,8 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
 	fattr->valid = 0;
 	fattr->time_start = jiffies;
 	fattr->gencount = nfs_inc_attr_generation_counter();
+	fattr->owner_name = NULL;
+	fattr->group_name = NULL;
 }
 
 struct nfs_fattr *nfs_alloc_fattr(void)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3b1080118452..df3d3068242e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -52,6 +52,7 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/module.h>
+#include <linux/nfs_idmap.h>
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/xattr.h>
 #include <linux/utsname.h>
@@ -760,6 +761,8 @@ struct nfs4_opendata {
 	struct nfs_openres o_res;
 	struct nfs_open_confirmargs c_arg;
 	struct nfs_open_confirmres c_res;
+	struct nfs4_string owner_name;
+	struct nfs4_string group_name;
 	struct nfs_fattr f_attr;
 	struct nfs_fattr dir_attr;
 	struct dentry *dir;
@@ -783,6 +786,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 	p->o_res.server = p->o_arg.server;
 	nfs_fattr_init(&p->f_attr);
 	nfs_fattr_init(&p->dir_attr);
+	nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
 }
 
 static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
@@ -814,6 +818,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->o_arg.name = &dentry->d_name;
 	p->o_arg.server = server;
 	p->o_arg.bitmask = server->attr_bitmask;
+	p->o_arg.dir_bitmask = server->cache_consistency_bitmask;
 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
 	if (flags & O_CREAT) {
 		u32 *s;
@@ -850,6 +855,7 @@ static void nfs4_opendata_free(struct kref *kref)
 	dput(p->dir);
 	dput(p->dentry);
 	nfs_sb_deactive(sb);
+	nfs_fattr_free_names(&p->f_attr);
 	kfree(p);
 }
 
@@ -1574,6 +1580,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
 	if (status != 0 || !data->rpc_done)
 		return status;
 
+	nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
+
 	nfs_refresh_inode(dir, o_res->dir_attr);
 
 	if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
@@ -1606,6 +1614,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		return status;
 	}
 
+	nfs_fattr_map_and_free_names(server, &data->f_attr);
+
 	if (o_arg->open_flags & O_CREAT) {
 		update_changeattr(dir, &o_res->cinfo);
 		nfs_post_op_update_inode(dir, o_res->dir_attr);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dcaf69309d8e..95e92e438407 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2298,7 +2298,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_getfh(xdr, &hdr);
 	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_restorefh(xdr, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_getfattr(xdr, args->dir_bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -3792,7 +3792,8 @@ out_overflow:
 }
 
 static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
-		const struct nfs_server *server, uint32_t *uid, int may_sleep)
+		const struct nfs_server *server, uint32_t *uid,
+		struct nfs4_string *owner_name)
 {
 	uint32_t len;
 	__be32 *p;
@@ -3809,8 +3810,12 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
 		p = xdr_inline_decode(xdr, len);
 		if (unlikely(!p))
 			goto out_overflow;
-		if (!may_sleep) {
-			/* do nothing */
+		if (owner_name != NULL) {
+			owner_name->data = kmemdup(p, len, GFP_NOWAIT);
+			if (owner_name->data != NULL) {
+				owner_name->len = len;
+				ret = NFS_ATTR_FATTR_OWNER_NAME;
+			}
 		} else if (len < XDR_MAX_NETOBJ) {
 			if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
 				ret = NFS_ATTR_FATTR_OWNER;
@@ -3830,7 +3835,8 @@ out_overflow:
 }
 
 static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
-		const struct nfs_server *server, uint32_t *gid, int may_sleep)
+		const struct nfs_server *server, uint32_t *gid,
+		struct nfs4_string *group_name)
 {
 	uint32_t len;
 	__be32 *p;
@@ -3847,8 +3853,12 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
 		p = xdr_inline_decode(xdr, len);
 		if (unlikely(!p))
 			goto out_overflow;
-		if (!may_sleep) {
-			/* do nothing */
+		if (group_name != NULL) {
+			group_name->data = kmemdup(p, len, GFP_NOWAIT);
+			if (group_name->data != NULL) {
+				group_name->len = len;
+				ret = NFS_ATTR_FATTR_GROUP_NAME;
+			}
 		} else if (len < XDR_MAX_NETOBJ) {
 			if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
 				ret = NFS_ATTR_FATTR_GROUP;
@@ -4285,7 +4295,7 @@ xdr_error:
 
 static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		struct nfs_fattr *fattr, struct nfs_fh *fh,
-		const struct nfs_server *server, int may_sleep)
+		const struct nfs_server *server)
 {
 	int status;
 	umode_t fmode = 0;
@@ -4352,12 +4362,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
 		goto xdr_error;
 	fattr->valid |= status;
 
-	status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep);
+	status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, fattr->owner_name);
 	if (status < 0)
 		goto xdr_error;
 	fattr->valid |= status;
 
-	status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep);
+	status = decode_attr_group(xdr, bitmap, server, &fattr->gid, fattr->group_name);
 	if (status < 0)
 		goto xdr_error;
 	fattr->valid |= status;
@@ -4398,7 +4408,7 @@ xdr_error:
 }
 
 static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
-		struct nfs_fh *fh, const struct nfs_server *server, int may_sleep)
+		struct nfs_fh *fh, const struct nfs_server *server)
 {
 	__be32 *savep;
 	uint32_t attrlen,
@@ -4417,7 +4427,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat
 	if (status < 0)
 		goto xdr_error;
 
-	status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep);
+	status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server);
 	if (status < 0)
 		goto xdr_error;
 
@@ -4428,9 +4438,9 @@ xdr_error:
 }
 
 static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
-		const struct nfs_server *server, int may_sleep)
+		const struct nfs_server *server)
 {
-	return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
+	return decode_getfattr_generic(xdr, fattr, NULL, server);
 }
 
 /*
@@ -5711,8 +5721,7 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
 	status = decode_open_downgrade(xdr, res);
 	if (status != 0)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5738,8 +5747,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_access(xdr, res);
 	if (status != 0)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5768,8 +5776,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_getfh(xdr, res->fh);
 	if (status)
 		goto out;
-	status = decode_getfattr(xdr, res->fattr, res->server
-			,!RPC_IS_ASYNC(rqstp->rq_task));
+	status = decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5795,8 +5802,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
 		goto out;
 	status = decode_getfh(xdr, res->fh);
 	if (status == 0)
-		status = decode_getfattr(xdr, res->fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task));
+		status = decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5822,8 +5828,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_remove(xdr, &res->cinfo);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->dir_attr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->dir_attr, res->server);
 out:
 	return status;
 }
@@ -5856,14 +5861,12 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	/* Current FH is target directory */
-	if (decode_getfattr(xdr, res->new_fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
+	if (decode_getfattr(xdr, res->new_fattr, res->server))
 		goto out;
 	status = decode_restorefh(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->old_fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->old_fattr, res->server);
 out:
 	return status;
 }
@@ -5899,14 +5902,12 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	 * Note order: OP_LINK leaves the directory as the current
 	 *             filehandle.
 	 */
-	if (decode_getfattr(xdr, res->dir_attr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
+	if (decode_getfattr(xdr, res->dir_attr, res->server))
 		goto out;
 	status = decode_restorefh(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -5938,14 +5939,12 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_getfh(xdr, res->fh);
 	if (status)
 		goto out;
-	if (decode_getfattr(xdr, res->fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
+	if (decode_getfattr(xdr, res->fattr, res->server))
 		goto out;
 	status = decode_restorefh(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->dir_fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->dir_fattr, res->server);
 out:
 	return status;
 }
@@ -5977,8 +5976,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	status = decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6076,8 +6074,7 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	 * 	an ESTALE error. Shouldn't be a problem,
 	 * 	though, since fattr->valid will remain unset.
 	 */
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6108,13 +6105,11 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		goto out;
 	if (decode_getfh(xdr, &res->fh) != 0)
 		goto out;
-	if (decode_getfattr(xdr, res->f_attr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task)) != 0)
+	if (decode_getfattr(xdr, res->f_attr, res->server) != 0)
 		goto out;
 	if (decode_restorefh(xdr) != 0)
 		goto out;
-	decode_getfattr(xdr, res->dir_attr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->dir_attr, res->server);
 out:
 	return status;
 }
@@ -6162,8 +6157,7 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
 	status = decode_open(xdr, res);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->f_attr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->f_attr, res->server);
 out:
 	return status;
 }
@@ -6190,8 +6184,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
 	status = decode_setattr(xdr);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6371,8 +6364,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	if (res->fattr)
-		decode_getfattr(xdr, res->fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task));
+		decode_getfattr(xdr, res->fattr, res->server);
 	if (!status)
 		status = res->count;
 out:
@@ -6401,8 +6393,7 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	if (status)
 		goto out;
 	if (res->fattr)
-		decode_getfattr(xdr, res->fattr, res->server,
-				!RPC_IS_ASYNC(rqstp->rq_task));
+		decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6561,8 +6552,7 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
 	status = decode_delegreturn(xdr);
 	if (status != 0)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6591,8 +6581,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
 		goto out;
 	xdr_enter_page(xdr, PAGE_SIZE);
 	status = decode_getfattr(xdr, &res->fs_locations->fattr,
-				 res->fs_locations->server,
-				 !RPC_IS_ASYNC(req->rq_task));
+				 res->fs_locations->server);
 out:
 	return status;
 }
@@ -6841,8 +6830,7 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
 	status = decode_layoutcommit(xdr, rqstp, res);
 	if (status)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server,
-			!RPC_IS_ASYNC(rqstp->rq_task));
+	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6973,7 +6961,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 		goto out_overflow;
 
 	if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
-					entry->server, 1) < 0)
+					entry->server) < 0)
 		goto out_overflow;
 	if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
 		entry->ino = entry->fattr->mounted_on_fileid;
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
index ae7d6a380dae..308c18877018 100644
--- a/include/linux/nfs_idmap.h
+++ b/include/linux/nfs_idmap.h
@@ -66,6 +66,8 @@ struct idmap_msg {
 /* Forward declaration to make this header independent of others */
 struct nfs_client;
 struct nfs_server;
+struct nfs_fattr;
+struct nfs4_string;
 
 #ifdef CONFIG_NFS_USE_NEW_IDMAPPER
 
@@ -97,6 +99,12 @@ void nfs_idmap_delete(struct nfs_client *);
 
 #endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
 
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+		struct nfs4_string *owner_name,
+		struct nfs4_string *group_name);
+void nfs_fattr_free_names(struct nfs_fattr *);
+void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *);
+
 int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, __u32 *);
 int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, __u32 *);
 int nfs_map_uid_to_name(const struct nfs_server *, __u32, char *, size_t);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6c898afe6095..a764cef06b73 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -18,6 +18,11 @@
 /* Forward declaration for NFS v3 */
 struct nfs4_secinfo_flavors;
 
+struct nfs4_string {
+	unsigned int len;
+	char *data;
+};
+
 struct nfs_fsid {
 	uint64_t		major;
 	uint64_t		minor;
@@ -61,6 +66,8 @@ struct nfs_fattr {
 	struct timespec		pre_ctime;	/* pre_op_attr.ctime	  */
 	unsigned long		time_start;
 	unsigned long		gencount;
+	struct nfs4_string	*owner_name;
+	struct nfs4_string	*group_name;
 };
 
 #define NFS_ATTR_FATTR_TYPE		(1U << 0)
@@ -85,6 +92,8 @@ struct nfs_fattr {
 #define NFS_ATTR_FATTR_V4_REFERRAL	(1U << 19)	/* NFSv4 referral */
 #define NFS_ATTR_FATTR_MOUNTPOINT	(1U << 20)	/* Treat as mountpoint */
 #define NFS_ATTR_FATTR_MOUNTED_ON_FILEID		(1U << 21)
+#define NFS_ATTR_FATTR_OWNER_NAME	(1U << 22)
+#define NFS_ATTR_FATTR_GROUP_NAME	(1U << 23)
 
 #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
 		| NFS_ATTR_FATTR_MODE \
@@ -324,6 +333,7 @@ struct nfs_openargs {
 	const struct qstr *	name;
 	const struct nfs_server *server;	 /* Needed for ID mapping */
 	const u32 *		bitmask;
+	const u32 *		dir_bitmask;
 	__u32			claim;
 	struct nfs4_sequence_args	seq_args;
 };
@@ -342,6 +352,8 @@ struct nfs_openres {
 	__u32			do_recall;
 	__u64			maxsize;
 	__u32			attrset[NFS4_BITMAP_SIZE];
+	struct nfs4_string	*owner;
+	struct nfs4_string	*group_owner;
 	struct nfs4_sequence_res	seq_res;
 };
 
@@ -778,11 +790,6 @@ struct nfs3_getaclres {
 	struct posix_acl *	acl_default;
 };
 
-struct nfs4_string {
-	unsigned int len;
-	char *data;
-};
-
 #ifdef CONFIG_NFS_V4
 
 typedef u64 clientid4;
-- 
cgit v1.2.3


From 96de37b62ca525cd77d2e85aea1472846ee31c4d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Sat, 7 Jan 2012 17:26:49 -0500
Subject: tracing: Fix compile error when static ftrace is enabled

The stack tracer uses the call ftrace_set_early_filter() function
to allow the stack tracer to pick its own functions on boot.
But this function is not defined if dynamic ftrace is not set.
This causes a compiler error when stack tracer is enabled and
dynamic ftrace is not.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 41df6f501656..028e26f0bf08 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -339,6 +339,7 @@ static inline int ftrace_text_reserved(void *start, void *end)
  * functions may still be called. Use a macro instead of inline.
  */
 #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
+#define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
 
 static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
 			    size_t cnt, loff_t *ppos) { return -ENODEV; }
-- 
cgit v1.2.3


From bc31b86a5923fad5f3fbb6192f767f410241ba27 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sat, 7 Jan 2012 20:41:55 -0600
Subject: writeback: move MIN_WRITEBACK_PAGES to fs-writeback.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix compile error

 fs/fs-writeback.c:515:33: error: ‘PAGE_CACHE_SHIFT’ undeclared (first use in this function)

Reported-by: Randy Dunlap <rdunlap@xenotime.net>
Acked-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 fs/fs-writeback.c         | 6 ++++++
 include/linux/writeback.h | 5 -----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 92d353e069dc..22e2d42742a9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/pagemap.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/writeback.h>
@@ -29,6 +30,11 @@
 #include <linux/tracepoint.h>
 #include "internal.h"
 
+/*
+ * 4MB minimal write chunk size
+ */
+#define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_CACHE_SHIFT - 10))
+
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b30419cd425e..4e0a55493023 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -25,11 +25,6 @@ DECLARE_PER_CPU(int, dirty_throttle_leaks);
 #define DIRTY_SCOPE		8
 #define DIRTY_FULL_SCOPE	(DIRTY_SCOPE / 2)
 
-/*
- * 4MB minimal write chunk size
- */
-#define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_CACHE_SHIFT - 10))
-
 struct backing_dev_info;
 
 /*
-- 
cgit v1.2.3


From 73de16db43f8dcb833ab032ed274b60b23676680 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Tue, 8 Nov 2011 09:44:06 +0530
Subject: mfd: Add support for irq over gpio pin to stmpe

On many boards, stmpe is present as an separate device (not as part of SoC).
Here gpio lines are mostly used for getting interrupts. This patch adds in
support to handle irq over gpio pin.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe.c       | 36 +++++++++++++++++++++++++++++-------
 include/linux/mfd/stmpe.h |  7 +++++++
 2 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 2963689cf45c..39efa629a19d 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -5,6 +5,7 @@
  * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
  */
 
+#include <linux/gpio.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -877,9 +878,10 @@ static int __devinit stmpe_devices_init(struct stmpe *stmpe)
 static int stmpe_suspend(struct device *dev)
 {
 	struct i2c_client *i2c = to_i2c_client(dev);
+	struct stmpe *stmpe = i2c_get_clientdata(i2c);
 
 	if (device_may_wakeup(&i2c->dev))
-		enable_irq_wake(i2c->irq);
+		enable_irq_wake(stmpe->irq);
 
 	return 0;
 }
@@ -887,9 +889,10 @@ static int stmpe_suspend(struct device *dev)
 static int stmpe_resume(struct device *dev)
 {
 	struct i2c_client *i2c = to_i2c_client(dev);
+	struct stmpe *stmpe = i2c_get_clientdata(i2c);
 
 	if (device_may_wakeup(&i2c->dev))
-		disable_irq_wake(i2c->irq);
+		disable_irq_wake(stmpe->irq);
 
 	return 0;
 }
@@ -925,15 +928,28 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 
 	i2c_set_clientdata(i2c, stmpe);
 
+	if (pdata->irq_over_gpio) {
+		ret = gpio_request_one(pdata->irq_gpio, GPIOF_DIR_IN, "stmpe");
+		if (ret) {
+			dev_err(stmpe->dev, "failed to request IRQ GPIO: %d\n",
+					ret);
+			goto out_free;
+		}
+
+		stmpe->irq = gpio_to_irq(pdata->irq_gpio);
+	} else {
+		stmpe->irq = i2c->irq;
+	}
+
 	ret = stmpe_chip_init(stmpe);
 	if (ret)
-		goto out_free;
+		goto free_gpio;
 
 	ret = stmpe_irq_init(stmpe);
 	if (ret)
-		goto out_free;
+		goto free_gpio;
 
-	ret = request_threaded_irq(stmpe->i2c->irq, NULL, stmpe_irq,
+	ret = request_threaded_irq(stmpe->irq, NULL, stmpe_irq,
 				   pdata->irq_trigger | IRQF_ONESHOT,
 				   "stmpe", stmpe);
 	if (ret) {
@@ -951,9 +967,12 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 
 out_removedevs:
 	mfd_remove_devices(stmpe->dev);
-	free_irq(stmpe->i2c->irq, stmpe);
+	free_irq(stmpe->irq, stmpe);
 out_removeirq:
 	stmpe_irq_remove(stmpe);
+free_gpio:
+	if (pdata->irq_over_gpio)
+		gpio_free(pdata->irq_gpio);
 out_free:
 	kfree(stmpe);
 	return ret;
@@ -965,9 +984,12 @@ static int __devexit stmpe_remove(struct i2c_client *client)
 
 	mfd_remove_devices(stmpe->dev);
 
-	free_irq(stmpe->i2c->irq, stmpe);
+	free_irq(stmpe->irq, stmpe);
 	stmpe_irq_remove(stmpe);
 
+	if (stmpe->pdata->irq_over_gpio)
+		gpio_free(stmpe->pdata->irq_gpio);
+
 	kfree(stmpe);
 
 	return 0;
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index be1af7c42e57..270d6613aadf 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -61,6 +61,7 @@ struct stmpe_variant_info;
  * @variant: the detected STMPE model number
  * @regs: list of addresses of registers which are at different addresses on
  *	  different variants.  Indexed by one of STMPE_IDX_*.
+ * @irq: irq number for stmpe
  * @irq_base: starting IRQ number for internal IRQs
  * @num_gpios: number of gpios, differs for variants
  * @ier: cache of IER registers for bus_lock
@@ -76,6 +77,7 @@ struct stmpe {
 	struct stmpe_variant_info *variant;
 	const u8 *regs;
 
+	int irq;
 	int irq_base;
 	int num_gpios;
 	u8 ier[2];
@@ -183,6 +185,9 @@ struct stmpe_ts_platform_data {
  * @autosleep_timeout: inactivity timeout in milliseconds for autosleep
  * @irq_base: base IRQ number.  %STMPE_NR_IRQS irqs will be used, or
  *	      %STMPE_NR_INTERNAL_IRQS if the GPIO driver is not used.
+ * @irq_over_gpio: true if gpio is used to get irq
+ * @irq_gpio: gpio number over which irq will be requested (significant only if
+ *	      irq_over_gpio is true)
  * @gpio: GPIO-specific platform data
  * @keypad: keypad-specific platform data
  * @ts: touchscreen-specific platform data
@@ -194,6 +199,8 @@ struct stmpe_platform_data {
 	unsigned int irq_trigger;
 	bool irq_invert_polarity;
 	bool autosleep;
+	bool irq_over_gpio;
+	int irq_gpio;
 	int autosleep_timeout;
 
 	struct stmpe_gpio_platform_data *gpio;
-- 
cgit v1.2.3


From 289aabdaf943f3676a16908e2c3cc1a1f9877ccb Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 3 Nov 2011 13:41:14 +0000
Subject: mfd: Disable more pulls on WM8994

Disable more pulls by default on WM8994 for a small current saving. Since
some designs do leave SPKMODE floating provide platform data to allow that
to be left enabled.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8994-core.c        | 11 ++++++++---
 include/linux/mfd/wm8994/pdata.h |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index ff373dcda2c7..776298b313ab 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -374,6 +374,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 	struct wm8994_pdata *pdata = wm8994->dev->platform_data;
 	const char *devname;
 	int ret, i;
+	int pulls = 0;
 
 	dev_set_drvdata(wm8994->dev, wm8994);
 
@@ -516,12 +517,16 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 		}
 
 		wm8994->ldo_ena_always_driven = pdata->ldo_ena_always_driven;
+
+		if (pdata->spkmode_pu)
+			pulls |= WM8994_SPKMODE_PU;
 	}
 
-	/* Disable LDO pulldowns while the device is active */
+	/* Disable unneeded pulls */
 	wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
-			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD,
-			0);
+			WM8994_LDO1ENA_PD | WM8994_LDO2ENA_PD |
+			WM8994_SPKMODE_PU | WM8994_CSNADDR_PD,
+			pulls);
 
 	/* In some system designs where the regulators are not in use,
 	 * we can achieve a small reduction in leakage currents by
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index ea32f306dca6..54e2fef587d5 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -174,6 +174,12 @@ struct wm8994_pdata {
 	 * consumption will rise.
 	 */
 	bool ldo_ena_always_driven;
+
+	/*
+	 * SPKMODE must be pulled internally by the device on this
+	 * system.
+	 */
+	bool spkmode_pu;
 };
 
 #endif
-- 
cgit v1.2.3


From b46a36c0e0adc92c8be2c8a6fa68d979f6eee124 Mon Sep 17 00:00:00 2001
From: "Jett.Zhou" <jtzhou@marvell.com>
Date: Fri, 11 Nov 2011 15:38:27 +0800
Subject: mfd: Convert 88pm860x to use regmap api

Convert the 88pm860x normal bank register read/write to
use the register map API.

Signed-off-by: Jett.Zhou <jtzhou@marvell.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm860x-i2c.c   | 105 ++++++++++++++++++-------------------------
 drivers/mfd/Kconfig          |   1 +
 include/linux/mfd/88pm860x.h |   3 +-
 3 files changed, 47 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/88pm860x-i2c.c b/drivers/mfd/88pm860x-i2c.c
index f629d6f4e3e9..630f1b545fc4 100644
--- a/drivers/mfd/88pm860x-i2c.c
+++ b/drivers/mfd/88pm860x-i2c.c
@@ -12,51 +12,20 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/regmap.h>
 #include <linux/mfd/88pm860x.h>
 #include <linux/slab.h>
 
-static inline int pm860x_read_device(struct i2c_client *i2c,
-				     int reg, int bytes, void *dest)
-{
-	unsigned char data;
-	int ret;
-
-	data = (unsigned char)reg;
-	ret = i2c_master_send(i2c, &data, 1);
-	if (ret < 0)
-		return ret;
-
-	ret = i2c_master_recv(i2c, dest, bytes);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-static inline int pm860x_write_device(struct i2c_client *i2c,
-				      int reg, int bytes, void *src)
-{
-	unsigned char buf[bytes + 1];
-	int ret;
-
-	buf[0] = (unsigned char)reg;
-	memcpy(&buf[1], src, bytes);
-
-	ret = i2c_master_send(i2c, buf, bytes + 1);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
 int pm860x_reg_read(struct i2c_client *i2c, int reg)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
-	unsigned char data;
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
+	unsigned int data;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_read_device(i2c, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_read(map, reg, &data);
 	if (ret < 0)
 		return ret;
 	else
@@ -68,12 +37,11 @@ int pm860x_reg_write(struct i2c_client *i2c, int reg,
 		     unsigned char data)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_write_device(i2c, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_write(map, reg, data);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_reg_write);
@@ -82,12 +50,11 @@ int pm860x_bulk_read(struct i2c_client *i2c, int reg,
 		     int count, unsigned char *buf)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_read_device(i2c, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_raw_read(map, reg, buf, count);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_bulk_read);
@@ -96,12 +63,11 @@ int pm860x_bulk_write(struct i2c_client *i2c, int reg,
 		      int count, unsigned char *buf)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_write_device(i2c, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_raw_write(map, reg, buf, count);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_bulk_write);
@@ -110,18 +76,11 @@ int pm860x_set_bits(struct i2c_client *i2c, int reg,
 		    unsigned char mask, unsigned char data)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
-	unsigned char value;
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_read_device(i2c, reg, 1, &value);
-	if (ret < 0)
-		goto out;
-	value &= ~mask;
-	value |= data;
-	ret = pm860x_write_device(i2c, reg, 1, &value);
-out:
-	mutex_unlock(&chip->io_lock);
+	ret = regmap_update_bits(map, reg, mask, data);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_set_bits);
@@ -300,11 +259,17 @@ static int verify_addr(struct i2c_client *i2c)
 	return 0;
 }
 
+static struct regmap_config pm860x_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
 static int __devinit pm860x_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct pm860x_platform_data *pdata = client->dev.platform_data;
 	struct pm860x_chip *chip;
+	int ret;
 
 	if (!pdata) {
 		pr_info("No platform data in %s!\n", __func__);
@@ -316,10 +281,16 @@ static int __devinit pm860x_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	chip->id = verify_addr(client);
+	chip->regmap = regmap_init_i2c(client, &pm860x_regmap_config);
+	if (IS_ERR(chip->regmap)) {
+		ret = PTR_ERR(chip->regmap);
+		dev_err(&client->dev, "Failed to allocate register map: %d\n",
+				ret);
+		return ret;
+	}
 	chip->client = client;
 	i2c_set_clientdata(client, chip);
 	chip->dev = &client->dev;
-	mutex_init(&chip->io_lock);
 	dev_set_drvdata(chip->dev, chip);
 
 	/*
@@ -333,6 +304,14 @@ static int __devinit pm860x_probe(struct i2c_client *client,
 		chip->companion_addr = pdata->companion_addr;
 		chip->companion = i2c_new_dummy(chip->client->adapter,
 						chip->companion_addr);
+		chip->regmap_companion = regmap_init_i2c(chip->companion,
+							&pm860x_regmap_config);
+		if (IS_ERR(chip->regmap_companion)) {
+			ret = PTR_ERR(chip->regmap_companion);
+			dev_err(&chip->companion->dev,
+				"Failed to allocate register map: %d\n", ret);
+			return ret;
+		}
 		i2c_set_clientdata(chip->companion, chip);
 	}
 
@@ -345,7 +324,11 @@ static int __devexit pm860x_remove(struct i2c_client *client)
 	struct pm860x_chip *chip = i2c_get_clientdata(client);
 
 	pm860x_device_exit(chip);
-	i2c_unregister_device(chip->companion);
+	if (chip->companion) {
+		regmap_exit(chip->regmap_companion);
+		i2c_unregister_device(chip->companion);
+	}
+	regmap_exit(chip->regmap);
 	kfree(chip);
 	return 0;
 }
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f1391c21ef26..c9acd32fc0a4 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -12,6 +12,7 @@ config MFD_CORE
 config MFD_88PM860X
 	bool "Support Marvell 88PM8606/88PM8607"
 	depends on I2C=y && GENERIC_HARDIRQS
+	select REGMAP_I2C
 	select MFD_CORE
 	help
 	  This supports for Marvell 88PM8606/88PM8607 Power Management IC.
diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index 63b4fb8e3b6f..92be3476c9f5 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h
@@ -297,10 +297,11 @@ enum {
 
 struct pm860x_chip {
 	struct device		*dev;
-	struct mutex		io_lock;
 	struct mutex		irq_lock;
 	struct i2c_client	*client;
 	struct i2c_client	*companion;	/* companion chip client */
+	struct regmap           *regmap;
+	struct regmap           *regmap_companion;
 
 	int			buck3_double;	/* DVC ramp slope double */
 	unsigned short		companion_addr;
-- 
cgit v1.2.3


From 2439d9714e8f4f106c8965e093f9892cfb347d76 Mon Sep 17 00:00:00 2001
From: Donggeun Kim <dg77.kim@samsung.com>
Date: Thu, 24 Nov 2011 18:12:17 +0900
Subject: mfd: Add platform data for MAX8997 MUIC driver

Currently, MAX8997 device does not support MUIC function of it.
To add MAX8997 MUIC driver, header file should be updated.

Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/max8997.h | 58 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 0bbd13dbe336..49d2a0bfd7fe 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -77,6 +77,60 @@ struct max8997_regulator_data {
 	struct regulator_init_data *initdata;
 };
 
+enum max8997_muic_usb_type {
+	MAX8997_USB_HOST,
+	MAX8997_USB_DEVICE,
+};
+
+enum max8997_muic_charger_type {
+	MAX8997_CHARGER_TYPE_NONE = 0,
+	MAX8997_CHARGER_TYPE_USB,
+	MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT,
+	MAX8997_CHARGER_TYPE_DEDICATED_CHG,
+	MAX8997_CHARGER_TYPE_500MA,
+	MAX8997_CHARGER_TYPE_1A,
+	MAX8997_CHARGER_TYPE_DEAD_BATTERY = 7,
+};
+
+struct max8997_muic_reg_data {
+	u8 addr;
+	u8 data;
+};
+
+/**
+ * struct max8997_muic_platform_data
+ * @usb_callback: callback function for USB
+ *		  inform callee of USB type (HOST or DEVICE)
+ *		  and attached state(true or false)
+ * @charger_callback: callback function for charger
+ *		  inform callee of charger_type
+ *		  and attached state(true or false)
+ * @deskdock_callback: callback function for desk dock
+ *		  inform callee of attached state(true or false)
+ * @cardock_callback: callback function for car dock
+ *		  inform callee of attached state(true or false)
+ * @mhl_callback: callback function for MHL (Mobile High-definition Link)
+ *		  inform callee of attached state(true or false)
+ * @uart_callback: callback function for JIG UART
+ *		   inform callee of attached state(true or false)
+ * @init_data: array of max8997_muic_reg_data
+ *	       used for initializing registers of MAX8997 MUIC device
+ * @num_init_data: array size of init_data
+ */
+struct max8997_muic_platform_data {
+	void (*usb_callback)(enum max8997_muic_usb_type usb_type,
+		bool attached);
+	void (*charger_callback)(bool attached,
+		enum max8997_muic_charger_type charger_type);
+	void (*deskdock_callback) (bool attached);
+	void (*cardock_callback) (bool attached);
+	void (*mhl_callback) (bool attached);
+	void (*uart_callback) (bool attached);
+
+	struct max8997_muic_reg_data *init_data;
+	int num_init_data;
+};
+
 struct max8997_platform_data {
 	/* IRQ */
 	int irq_base;
@@ -113,7 +167,9 @@ struct max8997_platform_data {
 	/* charge Full Timeout */
 	int timeout; /* 0 (no timeout), 5, 6, 7 hours */
 
-	/* MUIC: Not implemented */
+	/* ---- MUIC ---- */
+	struct max8997_muic_platform_data *muic_pdata;
+
 	/* HAPTIC: Not implemented */
 	/* RTC: Not implemented */
 	/* Flash: Not implemented */
-- 
cgit v1.2.3


From 2161891a0a7bcad6ee8819bb324ee4a031bc8a95 Mon Sep 17 00:00:00 2001
From: Robin van der Gracht <robin@protonic.nl>
Date: Tue, 29 Nov 2011 12:09:03 +0100
Subject: mfd: Fixed unconditional reset of the mc13xxx ADC reading enable bits

When the ADC is being prepared for a single or multiple channel reading,
the adc0 register is reconfigured without taking the lithium cell, charge
current and battery current reading enable bits into account. Which results
in clearing the bits.

Signed-off-by: Robin van der Gracht <robin@protonic.nl>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/mc13xxx-core.c  | 4 ++--
 include/linux/mfd/mc13xxx.h | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index 441738048458..d0d3dfafba5c 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c
@@ -615,13 +615,13 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
 		break;
 
 	case MC13XXX_ADC_MODE_SINGLE_CHAN:
-		adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK;
+		adc0 |= old_adc0 & MC13XXX_ADC0_CONFIG_MASK;
 		adc1 |= (channel & 0x7) << MC13XXX_ADC1_CHAN0_SHIFT;
 		adc1 |= MC13XXX_ADC1_RAND;
 		break;
 
 	case MC13XXX_ADC_MODE_MULT_CHAN:
-		adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK;
+		adc0 |= old_adc0 & MC13XXX_ADC0_CONFIG_MASK;
 		adc1 |= 4 << MC13XXX_ADC1_CHAN1_SHIFT;
 		break;
 
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index 3816c2fac0ad..261fc117b40a 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -173,6 +173,9 @@ struct mc13xxx_platform_data {
 #define MC13XXX_ADC_MODE_MULT_CHAN	3
 
 #define MC13XXX_ADC0		43
+#define MC13XXX_ADC0_LICELLCON		(1 << 0)
+#define MC13XXX_ADC0_CHRGICON		(1 << 1)
+#define MC13XXX_ADC0_BATICON		(1 << 2)
 #define MC13XXX_ADC0_ADREFEN		(1 << 10)
 #define MC13XXX_ADC0_TSMOD0		(1 << 12)
 #define MC13XXX_ADC0_TSMOD1		(1 << 13)
@@ -184,4 +187,9 @@ struct mc13xxx_platform_data {
 					MC13XXX_ADC0_TSMOD1 | \
 					MC13XXX_ADC0_TSMOD2)
 
+#define MC13XXX_ADC0_CONFIG_MASK	(MC13XXX_ADC0_TSMOD_MASK | \
+					MC13XXX_ADC0_LICELLCON | \
+					MC13XXX_ADC0_CHRGICON | \
+					MC13XXX_ADC0_BATICON)
+
 #endif /* ifndef __LINUX_MFD_MC13XXX_H */
-- 
cgit v1.2.3


From 5dd7bf59e0e8563265b3e5b33276099ef628fcc7 Mon Sep 17 00:00:00 2001
From: Jochen Friedrich <jochen@scram.de>
Date: Sun, 27 Nov 2011 22:00:54 +0100
Subject: ARM: sa11x0: Implement autoloading of codec and codec pdata for mcp
 bus.

Signed-off-by: Jochen Friedrich <jochen@scram.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/mach-sa1100/assabet.c          |  1 +
 arch/arm/mach-sa1100/cerf.c             |  1 +
 arch/arm/mach-sa1100/collie.c           |  8 +++++-
 arch/arm/mach-sa1100/include/mach/mcp.h |  2 ++
 arch/arm/mach-sa1100/lart.c             |  1 +
 arch/arm/mach-sa1100/shannon.c          |  1 +
 arch/arm/mach-sa1100/simpad.c           |  8 +++++-
 drivers/mfd/mcp-core.c                  | 44 ++++++++++++++++++++++++++++--
 drivers/mfd/mcp-sa11x0.c                |  7 +++--
 drivers/mfd/ucb1x00-core.c              | 48 +++++++++++++++++++++++++--------
 drivers/mfd/ucb1x00-ts.c                |  2 +-
 include/linux/mfd/mcp.h                 |  7 +++--
 include/linux/mfd/ucb1x00.h             |  5 +++-
 include/linux/mod_devicetable.h         | 11 ++++++++
 scripts/mod/file2alias.c                | 13 +++++++++
 15 files changed, 138 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 3dd133f18415..14b31f116ef9 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -202,6 +202,7 @@ static struct irda_platform_data assabet_irda_data = {
 static struct mcp_plat_data assabet_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init assabet_init(void)
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index 7f3da4b11ec9..b7db7cd08305 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -124,6 +124,7 @@ static void __init cerf_map_io(void)
 static struct mcp_plat_data cerf_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init cerf_init(void)
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 2965cc9d424e..b0b5efee683b 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -27,6 +27,7 @@
 #include <linux/timer.h>
 #include <linux/gpio.h>
 #include <linux/pda_power.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -85,10 +86,15 @@ static struct scoop_pcmcia_config collie_pcmcia_config = {
 	.num_devs	= 1,
 };
 
+static struct ucb1x00_plat_data collie_ucb1x00_data = {
+	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
+};
+
 static struct mcp_plat_data collie_mcp_data = {
 	.mccr0		= MCCR0_ADM | MCCR0_ExtClk,
 	.sclk_rate	= 9216000,
-	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
+	.codec		= "ucb1x00",
+	.codec_pdata	= &collie_ucb1x00_data,
 };
 
 /*
diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h
index ed1a331508a7..586cec898b35 100644
--- a/arch/arm/mach-sa1100/include/mach/mcp.h
+++ b/arch/arm/mach-sa1100/include/mach/mcp.h
@@ -17,6 +17,8 @@ struct mcp_plat_data {
 	u32 mccr1;
 	unsigned int sclk_rate;
 	int gpio_base;
+	const char *codec;
+	void *codec_pdata;
 };
 
 #endif
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index 5bc59d0947ba..34bbdd986e43 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -24,6 +24,7 @@
 static struct mcp_plat_data lart_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init lart_init(void)
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 1cccbf5b9e9a..252faa5e2395 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -55,6 +55,7 @@ static struct resource shannon_flash_resource = {
 static struct mcp_plat_data shannon_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init shannon_init(void)
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 4790f3f3d008..7eac8ebab94e 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -14,6 +14,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <asm/irq.h>
 #include <mach/hardware.h>
@@ -187,10 +188,15 @@ static struct resource simpad_flash_resources [] = {
 	}
 };
 
+static struct ucb1x00_plat_data simpad_ucb1x00_data = {
+	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
+};
+
 static struct mcp_plat_data simpad_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
+	.codec		= "ucb1300",
+	.codec_pdata	= &simpad_ucb1x00_data,
 };
 
 
diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c
index 84815f9ef636..63be60bc3455 100644
--- a/drivers/mfd/mcp-core.c
+++ b/drivers/mfd/mcp-core.c
@@ -26,9 +26,35 @@
 #define to_mcp(d)		container_of(d, struct mcp, attached_device)
 #define to_mcp_driver(d)	container_of(d, struct mcp_driver, drv)
 
+static const struct mcp_device_id *mcp_match_id(const struct mcp_device_id *id,
+						const char *codec)
+{
+	while (id->name[0]) {
+		if (strcmp(codec, id->name) == 0)
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
+const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp)
+{
+	const struct mcp_driver *driver =
+		to_mcp_driver(mcp->attached_device.driver);
+
+	return mcp_match_id(driver->id_table, mcp->codec);
+}
+EXPORT_SYMBOL(mcp_get_device_id);
+
 static int mcp_bus_match(struct device *dev, struct device_driver *drv)
 {
-	return 1;
+	const struct mcp *mcp = to_mcp(dev);
+	const struct mcp_driver *driver = to_mcp_driver(drv);
+
+	if (driver->id_table)
+		return !!mcp_match_id(driver->id_table, mcp->codec);
+
+	return 0;
 }
 
 static int mcp_bus_probe(struct device *dev)
@@ -74,9 +100,18 @@ static int mcp_bus_resume(struct device *dev)
 	return ret;
 }
 
+static int mcp_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct mcp *mcp = to_mcp(dev);
+
+	add_uevent_var(env, "MODALIAS=%s%s", MCP_MODULE_PREFIX, mcp->codec);
+	return 0;
+}
+
 static struct bus_type mcp_bus_type = {
 	.name		= "mcp",
 	.match		= mcp_bus_match,
+	.uevent		= mcp_bus_uevent,
 	.probe		= mcp_bus_probe,
 	.remove		= mcp_bus_remove,
 	.suspend	= mcp_bus_suspend,
@@ -212,9 +247,14 @@ struct mcp *mcp_host_alloc(struct device *parent, size_t size)
 }
 EXPORT_SYMBOL(mcp_host_alloc);
 
-int mcp_host_register(struct mcp *mcp)
+int mcp_host_register(struct mcp *mcp, void *pdata)
 {
+	if (!mcp->codec)
+		return -EINVAL;
+
+	mcp->attached_device.platform_data = pdata;
 	dev_set_name(&mcp->attached_device, "mcp0");
+	request_module("%s%s", MCP_MODULE_PREFIX, mcp->codec);
 	return device_register(&mcp->attached_device);
 }
 EXPORT_SYMBOL(mcp_host_register);
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 02c53a0766c4..da4e077a1bee 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -146,6 +146,9 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENODEV;
 
+	if (!data->codec)
+		return -ENODEV;
+
 	if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp"))
 		return -EBUSY;
 
@@ -162,7 +165,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->dma_audio_wr	= DMA_Ser4MCP0Wr;
 	mcp->dma_telco_rd	= DMA_Ser4MCP1Rd;
 	mcp->dma_telco_wr	= DMA_Ser4MCP1Wr;
-	mcp->gpio_base		= data->gpio_base;
+	mcp->codec		= data->codec;
 
 	platform_set_drvdata(pdev, mcp);
 
@@ -195,7 +198,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->rw_timeout = (64 * 3 * 1000000 + mcp->sclk_rate - 1) /
 			  mcp->sclk_rate;
 
-	ret = mcp_host_register(mcp);
+	ret = mcp_host_register(mcp, data->codec_pdata);
 	if (ret == 0)
 		goto out;
 
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index b281217334eb..91c4f25e0e55 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -36,6 +36,15 @@ static DEFINE_MUTEX(ucb1x00_mutex);
 static LIST_HEAD(ucb1x00_drivers);
 static LIST_HEAD(ucb1x00_devices);
 
+static struct mcp_device_id ucb1x00_id[] = {
+	{ "ucb1x00", 0 },  /* auto-detection */
+	{ "ucb1200", UCB_ID_1200 },
+	{ "ucb1300", UCB_ID_1300 },
+	{ "tc35143", UCB_ID_TC35143 },
+	{ }
+};
+MODULE_DEVICE_TABLE(mcp, ucb1x00_id);
+
 /**
  *	ucb1x00_io_set_dir - set IO direction
  *	@ucb: UCB1x00 structure describing chip
@@ -527,17 +536,33 @@ static struct class ucb1x00_class = {
 
 static int ucb1x00_probe(struct mcp *mcp)
 {
+	const struct mcp_device_id *mid;
 	struct ucb1x00 *ucb;
 	struct ucb1x00_driver *drv;
+	struct ucb1x00_plat_data *pdata;
 	unsigned int id;
 	int ret = -ENODEV;
 	int temp;
 
 	mcp_enable(mcp);
 	id = mcp_reg_read(mcp, UCB_ID);
+	mid = mcp_get_device_id(mcp);
 
-	if (id != UCB_ID_1200 && id != UCB_ID_1300 && id != UCB_ID_TC35143) {
-		printk(KERN_WARNING "UCB1x00 ID not found: %04x\n", id);
+	if (mid && mid->driver_data) {
+		if (id != mid->driver_data) {
+			printk(KERN_WARNING "%s wrong ID %04x found: %04x\n",
+				mid->name, (unsigned int) mid->driver_data, id);
+			goto err_disable;
+		}
+	} else {
+		mid = &ucb1x00_id[1];
+		while (mid->driver_data) {
+			if (id == mid->driver_data)
+				break;
+			mid++;
+		}
+		printk(KERN_WARNING "%s ID not found: %04x\n",
+			ucb1x00_id[0].name, id);
 		goto err_disable;
 	}
 
@@ -546,28 +571,28 @@ static int ucb1x00_probe(struct mcp *mcp)
 	if (!ucb)
 		goto err_disable;
 
-
+	pdata = mcp->attached_device.platform_data;
 	ucb->dev.class = &ucb1x00_class;
 	ucb->dev.parent = &mcp->attached_device;
-	dev_set_name(&ucb->dev, "ucb1x00");
+	dev_set_name(&ucb->dev, mid->name);
 
 	spin_lock_init(&ucb->lock);
 	spin_lock_init(&ucb->io_lock);
 	sema_init(&ucb->adc_sem, 1);
 
-	ucb->id  = id;
+	ucb->id  = mid;
 	ucb->mcp = mcp;
 	ucb->irq = ucb1x00_detect_irq(ucb);
 	if (ucb->irq == NO_IRQ) {
-		printk(KERN_ERR "UCB1x00: IRQ probe failed\n");
+		printk(KERN_ERR "%s: IRQ probe failed\n", mid->name);
 		ret = -ENODEV;
 		goto err_free;
 	}
 
 	ucb->gpio.base = -1;
-	if (mcp->gpio_base != 0) {
+	if (pdata && (pdata->gpio_base >= 0)) {
 		ucb->gpio.label = dev_name(&ucb->dev);
-		ucb->gpio.base = mcp->gpio_base;
+		ucb->gpio.base = pdata->gpio_base;
 		ucb->gpio.ngpio = 10;
 		ucb->gpio.set = ucb1x00_gpio_set;
 		ucb->gpio.get = ucb1x00_gpio_get;
@@ -580,10 +605,10 @@ static int ucb1x00_probe(struct mcp *mcp)
 		dev_info(&ucb->dev, "gpio_base not set so no gpiolib support");
 
 	ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING,
-			  "UCB1x00", ucb);
+			  mid->name, ucb);
 	if (ret) {
-		printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
-			ucb->irq, ret);
+		printk(KERN_ERR "%s: unable to grab irq%d: %d\n",
+			mid->name, ucb->irq, ret);
 		goto err_gpio;
 	}
 
@@ -705,6 +730,7 @@ static struct mcp_driver ucb1x00_driver = {
 	.remove		= ucb1x00_remove,
 	.suspend	= ucb1x00_suspend,
 	.resume		= ucb1x00_resume,
+	.id_table	= ucb1x00_id,
 };
 
 static int __init ucb1x00_init(void)
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 38ffbd50a0d2..40ec3c118868 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -382,7 +382,7 @@ static int ucb1x00_ts_add(struct ucb1x00_dev *dev)
 	ts->adcsync = adcsync ? UCB_SYNC : UCB_NOSYNC;
 
 	idev->name       = "Touchscreen panel";
-	idev->id.product = ts->ucb->id;
+	idev->id.product = ts->ucb->id->driver_data;
 	idev->open       = ucb1x00_ts_open;
 	idev->close      = ucb1x00_ts_close;
 
diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index ee496708e38b..1515e64e3663 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -10,6 +10,7 @@
 #ifndef MCP_H
 #define MCP_H
 
+#include <linux/mod_devicetable.h>
 #include <mach/dma.h>
 
 struct mcp_ops;
@@ -26,7 +27,7 @@ struct mcp {
 	dma_device_t	dma_telco_rd;
 	dma_device_t	dma_telco_wr;
 	struct device	attached_device;
-	int		gpio_base;
+	const char	*codec;
 };
 
 struct mcp_ops {
@@ -44,10 +45,11 @@ void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
 unsigned int mcp_reg_read(struct mcp *, unsigned int);
 void mcp_enable(struct mcp *);
 void mcp_disable(struct mcp *);
+const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp);
 #define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
 
 struct mcp *mcp_host_alloc(struct device *, size_t);
-int mcp_host_register(struct mcp *);
+int mcp_host_register(struct mcp *, void *);
 void mcp_host_unregister(struct mcp *);
 
 struct mcp_driver {
@@ -56,6 +58,7 @@ struct mcp_driver {
 	void (*remove)(struct mcp *);
 	int (*suspend)(struct mcp *, pm_message_t);
 	int (*resume)(struct mcp *);
+	const struct mcp_device_id *id_table;
 };
 
 int mcp_driver_register(struct mcp_driver *);
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index 4321f044d1e4..bc19e5fb7ea8 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h
@@ -104,6 +104,9 @@
 #define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
 #define UCB_MODE_AUD_OFF_CAN	(1 << 13)
 
+struct ucb1x00_plat_data {
+	int		gpio_base;
+};
 
 struct ucb1x00_irq {
 	void *devid;
@@ -116,7 +119,7 @@ struct ucb1x00 {
 	unsigned int		irq;
 	struct semaphore	adc_sem;
 	spinlock_t		io_lock;
-	u16			id;
+	const struct mcp_device_id *id;
 	u16			io_dir;
 	u16			io_out;
 	u16			adc_cr;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 468819cdde87..bc50d9a80d89 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -436,6 +436,17 @@ struct spi_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
+/* mcp */
+
+#define MCP_NAME_SIZE	20
+#define MCP_MODULE_PREFIX "mcp:"
+
+struct mcp_device_id {
+	char name[MCP_NAME_SIZE];
+	kernel_ulong_t driver_data	/* Data private to the driver */
+			__attribute__((aligned(sizeof(kernel_ulong_t))));
+};
+
 /* dmi */
 enum dmi_field {
 	DMI_NONE,
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index f936d1fa969d..e8c7eb16c0ea 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -774,6 +774,15 @@ static int do_spi_entry(const char *filename, struct spi_device_id *id,
 	return 1;
 }
 
+/* Looks like: mcp:S */
+static int do_mcp_entry(const char *filename, struct mcp_device_id *id,
+			char *alias)
+{
+	sprintf(alias, MCP_MODULE_PREFIX "%s", id->name);
+
+	return 1;
+}
+
 static const struct dmifield {
 	const char *prefix;
 	int field;
@@ -1027,6 +1036,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct spi_device_id), "spi",
 			 do_spi_entry, mod);
+	else if (sym_is(symname, "__mod_mcp_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct mcp_device_id), "mcp",
+			 do_mcp_entry, mod);
 	else if (sym_is(symname, "__mod_dmi_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct dmi_system_id), "dmi",
-- 
cgit v1.2.3


From bdd6a67a3936f4da5ef382ec640b8d1eb1f209fa Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 28 Nov 2011 23:26:47 +0000
Subject: mfd: Constify WM8994 regulator_init_data

The driver has no need to modify the regulator_init_data so declare it
const to allow machine code to do so.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/pdata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 54e2fef587d5..b00897a6c461 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -23,7 +23,7 @@ struct wm8994_ldo_pdata {
 	int enable;
 
 	const char *supply;
-	struct regulator_init_data *init_data;
+	const struct regulator_init_data *init_data;
 };
 
 #define WM8994_CONFIGURE_GPIO 0x10000
-- 
cgit v1.2.3


From 83051b7287e43241ccb6adaaa92615a87274898b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 1 Dec 2011 13:56:53 +0000
Subject: mfd: Add missing mutex.h inclusion to WM8994 core.h

struct wm8994 includes a mutex so we need to include mutex.h before we
declare it. All current users rely on this being done implicitly.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8994/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index f44bdb7273bd..8eb1b75cb755 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -15,6 +15,7 @@
 #ifndef __MFD_WM8994_CORE_H__
 #define __MFD_WM8994_CORE_H__
 
+#include <linux/mutex.h>
 #include <linux/interrupt.h>
 
 enum wm8994_type {
-- 
cgit v1.2.3


From ee66e653ca7425bc8ffca4e00f19a8057cd14e4d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 2 Dec 2011 14:16:33 +0100
Subject: mfd: Unify abx500 headers in mfd/abx500

This moves all the header files related to the abx500 family into
a common include directory below mfd. From now on we place any
subchip header in that directory. Headers previously in e.g.
<linux/mfd/ab8500/gpio.h> get prefixed and are now e.g.
<linux/mfd/abx500/ab8500-gpio.h>. The top-level abstract interface
remains in <linux/mfd/abx500.h>.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 arch/arm/mach-ux500/board-mop500.c                 |   4 +-
 arch/arm/mach-ux500/board-u5500.c                  |   2 +-
 .../mach-ux500/include/mach/irqs-board-mop500.h    |   2 +-
 drivers/input/misc/ab8500-ponkey.c                 |   2 +-
 drivers/mfd/ab5500-core.c                          |   2 +-
 drivers/mfd/ab5500-debugfs.c                       |   2 +-
 drivers/mfd/ab8500-core.c                          |   2 +-
 drivers/mfd/ab8500-debugfs.c                       |   2 +-
 drivers/mfd/ab8500-gpadc.c                         |   4 +-
 drivers/mfd/ab8500-i2c.c                           |   2 +-
 drivers/mfd/ab8500-sysctrl.c                       |   4 +-
 drivers/misc/ab8500-pwm.c                          |   2 +-
 drivers/regulator/ab8500.c                         |   2 +-
 drivers/rtc/rtc-ab8500.c                           |   2 +-
 drivers/usb/otg/ab8500-usb.c                       |   2 +-
 include/linux/mfd/ab5500/ab5500.h                  | 140 ------------
 include/linux/mfd/ab8500.h                         | 201 ----------------
 include/linux/mfd/ab8500/gpadc.h                   |  35 ---
 include/linux/mfd/ab8500/gpio.h                    |  21 --
 include/linux/mfd/ab8500/sysctrl.h                 | 254 ---------------------
 include/linux/mfd/abx500/ab5500.h                  | 140 ++++++++++++
 include/linux/mfd/abx500/ab8500-gpadc.h            |  35 +++
 include/linux/mfd/abx500/ab8500-gpio.h             |  21 ++
 include/linux/mfd/abx500/ab8500-sysctrl.h          | 254 +++++++++++++++++++++
 include/linux/mfd/abx500/ab8500.h                  | 201 ++++++++++++++++
 25 files changed, 669 insertions(+), 669 deletions(-)
 delete mode 100644 include/linux/mfd/ab5500/ab5500.h
 delete mode 100644 include/linux/mfd/ab8500.h
 delete mode 100644 include/linux/mfd/ab8500/gpadc.h
 delete mode 100644 include/linux/mfd/ab8500/gpio.h
 delete mode 100644 include/linux/mfd/ab8500/sysctrl.h
 create mode 100644 include/linux/mfd/abx500/ab5500.h
 create mode 100644 include/linux/mfd/abx500/ab8500-gpadc.h
 create mode 100644 include/linux/mfd/abx500/ab8500-gpio.h
 create mode 100644 include/linux/mfd/abx500/ab8500-sysctrl.h
 create mode 100644 include/linux/mfd/abx500/ab8500.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index bdd7b80dd7ad..80cef36d71ce 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -19,11 +19,11 @@
 #include <linux/amba/pl022.h>
 #include <linux/amba/serial.h>
 #include <linux/spi/spi.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/ab8500.h>
 #include <linux/mfd/tc3589x.h>
 #include <linux/mfd/tps6105x.h>
-#include <linux/mfd/ab8500/gpio.h>
+#include <linux/mfd/abx500/ab8500-gpio.h>
 #include <linux/leds-lp5521.h>
 #include <linux/input.h>
 #include <linux/smsc911x.h>
diff --git a/arch/arm/mach-ux500/board-u5500.c b/arch/arm/mach-ux500/board-u5500.c
index 82025ba70c03..4ecb07a93f14 100644
--- a/arch/arm/mach-ux500/board-u5500.c
+++ b/arch/arm/mach-ux500/board-u5500.c
@@ -10,7 +10,7 @@
 #include <linux/amba/bus.h>
 #include <linux/irq.h>
 #include <linux/i2c.h>
-#include <linux/mfd/ab5500/ab5500.h>
+#include <linux/mfd/abx500/ab5500.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h b/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h
index 47969909836c..d2d4131435a6 100644
--- a/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h
+++ b/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h
@@ -9,7 +9,7 @@
 #define __MACH_IRQS_BOARD_MOP500_H
 
 /* Number of AB8500 irqs is taken from header file */
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 
 #define MOP500_AB8500_IRQ_BASE		IRQ_BOARD_START
 #define MOP500_AB8500_IRQ_END		(MOP500_AB8500_IRQ_BASE \
diff --git a/drivers/input/misc/ab8500-ponkey.c b/drivers/input/misc/ab8500-ponkey.c
index 3d3288a78fdc..3f199e1539bf 100644
--- a/drivers/input/misc/ab8500-ponkey.c
+++ b/drivers/input/misc/ab8500-ponkey.c
@@ -12,7 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/slab.h>
 
 /**
diff --git a/drivers/mfd/ab5500-core.c b/drivers/mfd/ab5500-core.c
index ec10629a0b0b..bd56a764dea1 100644
--- a/drivers/mfd/ab5500-core.c
+++ b/drivers/mfd/ab5500-core.c
@@ -22,8 +22,8 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/random.h>
-#include <linux/mfd/ab5500/ab5500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab5500.h>
 #include <linux/list.h>
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
diff --git a/drivers/mfd/ab5500-debugfs.c b/drivers/mfd/ab5500-debugfs.c
index b7b2d3483fd4..72006940937a 100644
--- a/drivers/mfd/ab5500-debugfs.c
+++ b/drivers/mfd/ab5500-debugfs.c
@@ -7,8 +7,8 @@
 #include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/mfd/ab5500/ab5500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab5500.h>
 #include <linux/uaccess.h>
 
 #include "ab5500-core.h"
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index d3d572b2317b..53e2a80f42fa 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -17,7 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/ab8500.h>
 
 /*
diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index dedb7f65cea6..9a0211aa8897 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c
@@ -13,7 +13,7 @@
 #include <linux/platform_device.h>
 
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 
 static u32 debug_bank;
 static u32 debug_address;
diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c
index e985d1701a83..c39fc716e1dc 100644
--- a/drivers/mfd/ab8500-gpadc.c
+++ b/drivers/mfd/ab8500-gpadc.c
@@ -18,9 +18,9 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/list.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500/gpadc.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-gpadc.h>
 
 /*
  * GPADC register offsets
diff --git a/drivers/mfd/ab8500-i2c.c b/drivers/mfd/ab8500-i2c.c
index 9be541c6b004..087fecd71ce0 100644
--- a/drivers/mfd/ab8500-i2c.c
+++ b/drivers/mfd/ab8500-i2c.c
@@ -10,7 +10,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/mfd/db8500-prcmu.h>
 
 static int ab8500_i2c_write(struct ab8500 *ab8500, u16 addr, u8 data)
diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index f20feefac190..c28d4eb1eff0 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -7,9 +7,9 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500/sysctrl.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-sysctrl.h>
 
 static struct device *sysctrl_dev;
 
diff --git a/drivers/misc/ab8500-pwm.c b/drivers/misc/ab8500-pwm.c
index 2208a9d52622..d7a9aa14e5d5 100644
--- a/drivers/misc/ab8500-pwm.c
+++ b/drivers/misc/ab8500-pwm.c
@@ -8,8 +8,8 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/pwm.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/module.h>
 
 /*
diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c
index 6e1ae69646b3..80d08237a5a7 100644
--- a/drivers/regulator/ab8500.c
+++ b/drivers/regulator/ab8500.c
@@ -16,8 +16,8 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/ab8500.h>
diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
index e346705aae92..db16ce212d6b 100644
--- a/drivers/rtc/rtc-ab8500.c
+++ b/drivers/rtc/rtc-ab8500.c
@@ -15,7 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/delay.h>
 
 #define AB8500_RTC_SOFF_STAT_REG	0x00
diff --git a/drivers/usb/otg/ab8500-usb.c b/drivers/usb/otg/ab8500-usb.c
index 07ccea9ada40..74fe6e62e0f7 100644
--- a/drivers/usb/otg/ab8500-usb.c
+++ b/drivers/usb/otg/ab8500-usb.c
@@ -30,7 +30,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 
 #define AB8500_MAIN_WD_CTRL_REG 0x01
 #define AB8500_USB_LINE_STAT_REG 0x80
diff --git a/include/linux/mfd/ab5500/ab5500.h b/include/linux/mfd/ab5500/ab5500.h
deleted file mode 100644
index a720051ae933..000000000000
--- a/include/linux/mfd/ab5500/ab5500.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson 2011
- *
- * License Terms: GNU General Public License v2
- */
-#ifndef MFD_AB5500_H
-#define MFD_AB5500_H
-
-#include <linux/device.h>
-
-enum ab5500_devid {
-	AB5500_DEVID_ADC,
-	AB5500_DEVID_LEDS,
-	AB5500_DEVID_POWER,
-	AB5500_DEVID_REGULATORS,
-	AB5500_DEVID_SIM,
-	AB5500_DEVID_RTC,
-	AB5500_DEVID_CHARGER,
-	AB5500_DEVID_FUELGAUGE,
-	AB5500_DEVID_VIBRATOR,
-	AB5500_DEVID_CODEC,
-	AB5500_DEVID_USB,
-	AB5500_DEVID_OTP,
-	AB5500_DEVID_VIDEO,
-	AB5500_DEVID_DBIECI,
-	AB5500_DEVID_ONSWA,
-	AB5500_NUM_DEVICES,
-};
-
-enum ab5500_banks {
-	AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP = 0,
-	AB5500_BANK_VDDDIG_IO_I2C_CLK_TST = 1,
-	AB5500_BANK_VDENC = 2,
-	AB5500_BANK_SIM_USBSIM  = 3,
-	AB5500_BANK_LED = 4,
-	AB5500_BANK_ADC  = 5,
-	AB5500_BANK_RTC  = 6,
-	AB5500_BANK_STARTUP  = 7,
-	AB5500_BANK_DBI_ECI  = 8,
-	AB5500_BANK_CHG  = 9,
-	AB5500_BANK_FG_BATTCOM_ACC = 10,
-	AB5500_BANK_USB = 11,
-	AB5500_BANK_IT = 12,
-	AB5500_BANK_VIBRA = 13,
-	AB5500_BANK_AUDIO_HEADSETUSB = 14,
-	AB5500_NUM_BANKS = 15,
-};
-
-enum ab5500_banks_addr {
-	AB5500_ADDR_VIT_IO_I2C_CLK_TST_OTP = 0x4A,
-	AB5500_ADDR_VDDDIG_IO_I2C_CLK_TST = 0x4B,
-	AB5500_ADDR_VDENC = 0x06,
-	AB5500_ADDR_SIM_USBSIM  = 0x04,
-	AB5500_ADDR_LED = 0x10,
-	AB5500_ADDR_ADC  = 0x0A,
-	AB5500_ADDR_RTC  = 0x0F,
-	AB5500_ADDR_STARTUP  = 0x03,
-	AB5500_ADDR_DBI_ECI  = 0x07,
-	AB5500_ADDR_CHG  = 0x0B,
-	AB5500_ADDR_FG_BATTCOM_ACC = 0x0C,
-	AB5500_ADDR_USB = 0x05,
-	AB5500_ADDR_IT = 0x0E,
-	AB5500_ADDR_VIBRA = 0x02,
-	AB5500_ADDR_AUDIO_HEADSETUSB = 0x0D,
-};
-
-/*
- * Interrupt register offsets
- * Bank : 0x0E
- */
-#define AB5500_IT_SOURCE0_REG		0x20
-#define AB5500_IT_SOURCE1_REG		0x21
-#define AB5500_IT_SOURCE2_REG		0x22
-#define AB5500_IT_SOURCE3_REG		0x23
-#define AB5500_IT_SOURCE4_REG		0x24
-#define AB5500_IT_SOURCE5_REG		0x25
-#define AB5500_IT_SOURCE6_REG		0x26
-#define AB5500_IT_SOURCE7_REG		0x27
-#define AB5500_IT_SOURCE8_REG		0x28
-#define AB5500_IT_SOURCE9_REG		0x29
-#define AB5500_IT_SOURCE10_REG		0x2A
-#define AB5500_IT_SOURCE11_REG		0x2B
-#define AB5500_IT_SOURCE12_REG		0x2C
-#define AB5500_IT_SOURCE13_REG		0x2D
-#define AB5500_IT_SOURCE14_REG		0x2E
-#define AB5500_IT_SOURCE15_REG		0x2F
-#define AB5500_IT_SOURCE16_REG		0x30
-#define AB5500_IT_SOURCE17_REG		0x31
-#define AB5500_IT_SOURCE18_REG		0x32
-#define AB5500_IT_SOURCE19_REG		0x33
-#define AB5500_IT_SOURCE20_REG		0x34
-#define AB5500_IT_SOURCE21_REG		0x35
-#define AB5500_IT_SOURCE22_REG		0x36
-#define AB5500_IT_SOURCE23_REG		0x37
-
-#define AB5500_NUM_IRQ_REGS		23
-
-/**
- * struct ab5500
- * @access_mutex: lock out concurrent accesses to the AB registers
- * @dev: a pointer to the device struct for this chip driver
- * @ab5500_irq: the analog baseband irq
- * @irq_base: the platform configuration irq base for subdevices
- * @chip_name: name of this chip variant
- * @chip_id: 8 bit chip ID for this chip variant
- * @irq_lock: a lock to protect the mask
- * @abb_events: a local bit mask of the prcmu wakeup events
- * @event_mask: a local copy of the mask event registers
- * @last_event_mask: a copy of the last event_mask written to hardware
- * @startup_events: a copy of the first reading of the event registers
- * @startup_events_read: whether the first events have been read
- */
-struct ab5500 {
-	struct mutex access_mutex;
-	struct device *dev;
-	unsigned int ab5500_irq;
-	unsigned int irq_base;
-	char chip_name[32];
-	u8 chip_id;
-	struct mutex irq_lock;
-	u32 abb_events;
-	u8 mask[AB5500_NUM_IRQ_REGS];
-	u8 oldmask[AB5500_NUM_IRQ_REGS];
-	u8 startup_events[AB5500_NUM_IRQ_REGS];
-	bool startup_events_read;
-#ifdef CONFIG_DEBUG_FS
-	unsigned int debug_bank;
-	unsigned int debug_address;
-#endif
-};
-
-struct ab5500_platform_data {
-	struct {unsigned int base; unsigned int count; } irq;
-	void *dev_data[AB5500_NUM_DEVICES];
-	struct abx500_init_settings *init_settings;
-	unsigned int init_settings_sz;
-	bool pm_power_off;
-};
-
-#endif /* MFD_AB5500_H */
diff --git a/include/linux/mfd/ab8500.h b/include/linux/mfd/ab8500.h
deleted file mode 100644
index 838c6b487cc5..000000000000
--- a/include/linux/mfd/ab8500.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson SA 2010
- *
- * License Terms: GNU General Public License v2
- * Author: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
- */
-#ifndef MFD_AB8500_H
-#define MFD_AB8500_H
-
-#include <linux/device.h>
-
-/*
- * AB8500 bank addresses
- */
-#define AB8500_SYS_CTRL1_BLOCK	0x1
-#define AB8500_SYS_CTRL2_BLOCK	0x2
-#define AB8500_REGU_CTRL1	0x3
-#define AB8500_REGU_CTRL2	0x4
-#define AB8500_USB		0x5
-#define AB8500_TVOUT		0x6
-#define AB8500_DBI		0x7
-#define AB8500_ECI_AV_ACC	0x8
-#define AB8500_RESERVED		0x9
-#define AB8500_GPADC		0xA
-#define AB8500_CHARGER		0xB
-#define AB8500_GAS_GAUGE	0xC
-#define AB8500_AUDIO		0xD
-#define AB8500_INTERRUPT	0xE
-#define AB8500_RTC		0xF
-#define AB8500_MISC		0x10
-#define AB8500_DEVELOPMENT	0x11
-#define AB8500_DEBUG		0x12
-#define AB8500_PROD_TEST	0x13
-#define AB8500_OTP_EMUL		0x15
-
-/*
- * Interrupts
- */
-
-#define AB8500_INT_MAIN_EXT_CH_NOT_OK	0
-#define AB8500_INT_UN_PLUG_TV_DET	1
-#define AB8500_INT_PLUG_TV_DET		2
-#define AB8500_INT_TEMP_WARM		3
-#define AB8500_INT_PON_KEY2DB_F		4
-#define AB8500_INT_PON_KEY2DB_R		5
-#define AB8500_INT_PON_KEY1DB_F		6
-#define AB8500_INT_PON_KEY1DB_R		7
-#define AB8500_INT_BATT_OVV		8
-#define AB8500_INT_MAIN_CH_UNPLUG_DET	10
-#define AB8500_INT_MAIN_CH_PLUG_DET	11
-#define AB8500_INT_USB_ID_DET_F		12
-#define AB8500_INT_USB_ID_DET_R		13
-#define AB8500_INT_VBUS_DET_F		14
-#define AB8500_INT_VBUS_DET_R		15
-#define AB8500_INT_VBUS_CH_DROP_END	16
-#define AB8500_INT_RTC_60S		17
-#define AB8500_INT_RTC_ALARM		18
-#define AB8500_INT_BAT_CTRL_INDB	20
-#define AB8500_INT_CH_WD_EXP		21
-#define AB8500_INT_VBUS_OVV		22
-#define AB8500_INT_MAIN_CH_DROP_END	23
-#define AB8500_INT_CCN_CONV_ACC		24
-#define AB8500_INT_INT_AUD		25
-#define AB8500_INT_CCEOC		26
-#define AB8500_INT_CC_INT_CALIB		27
-#define AB8500_INT_LOW_BAT_F		28
-#define AB8500_INT_LOW_BAT_R		29
-#define AB8500_INT_BUP_CHG_NOT_OK	30
-#define AB8500_INT_BUP_CHG_OK		31
-#define AB8500_INT_GP_HW_ADC_CONV_END	32
-#define AB8500_INT_ACC_DETECT_1DB_F	33
-#define AB8500_INT_ACC_DETECT_1DB_R	34
-#define AB8500_INT_ACC_DETECT_22DB_F	35
-#define AB8500_INT_ACC_DETECT_22DB_R	36
-#define AB8500_INT_ACC_DETECT_21DB_F	37
-#define AB8500_INT_ACC_DETECT_21DB_R	38
-#define AB8500_INT_GP_SW_ADC_CONV_END	39
-#define AB8500_INT_GPIO6R		40
-#define AB8500_INT_GPIO7R		41
-#define AB8500_INT_GPIO8R		42
-#define AB8500_INT_GPIO9R		43
-#define AB8500_INT_GPIO10R		44
-#define AB8500_INT_GPIO11R		45
-#define AB8500_INT_GPIO12R		46
-#define AB8500_INT_GPIO13R		47
-#define AB8500_INT_GPIO24R		48
-#define AB8500_INT_GPIO25R		49
-#define AB8500_INT_GPIO36R		50
-#define AB8500_INT_GPIO37R		51
-#define AB8500_INT_GPIO38R		52
-#define AB8500_INT_GPIO39R		53
-#define AB8500_INT_GPIO40R		54
-#define AB8500_INT_GPIO41R		55
-#define AB8500_INT_GPIO6F		56
-#define AB8500_INT_GPIO7F		57
-#define AB8500_INT_GPIO8F		58
-#define AB8500_INT_GPIO9F		59
-#define AB8500_INT_GPIO10F		60
-#define AB8500_INT_GPIO11F		61
-#define AB8500_INT_GPIO12F		62
-#define AB8500_INT_GPIO13F		63
-#define AB8500_INT_GPIO24F		64
-#define AB8500_INT_GPIO25F		65
-#define AB8500_INT_GPIO36F		66
-#define AB8500_INT_GPIO37F		67
-#define AB8500_INT_GPIO38F		68
-#define AB8500_INT_GPIO39F		69
-#define AB8500_INT_GPIO40F		70
-#define AB8500_INT_GPIO41F		71
-#define AB8500_INT_ADP_SOURCE_ERROR	72
-#define AB8500_INT_ADP_SINK_ERROR	73
-#define AB8500_INT_ADP_PROBE_PLUG	74
-#define AB8500_INT_ADP_PROBE_UNPLUG	75
-#define AB8500_INT_ADP_SENSE_OFF	76
-#define AB8500_INT_USB_PHY_POWER_ERR	78
-#define AB8500_INT_USB_LINK_STATUS	79
-#define AB8500_INT_BTEMP_LOW		80
-#define AB8500_INT_BTEMP_LOW_MEDIUM	81
-#define AB8500_INT_BTEMP_MEDIUM_HIGH	82
-#define AB8500_INT_BTEMP_HIGH		83
-#define AB8500_INT_USB_CHARGER_NOT_OK	89
-#define AB8500_INT_ID_WAKEUP_R		90
-#define AB8500_INT_ID_DET_R1R		92
-#define AB8500_INT_ID_DET_R2R		93
-#define AB8500_INT_ID_DET_R3R		94
-#define AB8500_INT_ID_DET_R4R		95
-#define AB8500_INT_ID_WAKEUP_F		96
-#define AB8500_INT_ID_DET_R1F		98
-#define AB8500_INT_ID_DET_R2F		99
-#define AB8500_INT_ID_DET_R3F		100
-#define AB8500_INT_ID_DET_R4F		101
-#define AB8500_INT_USB_CHG_DET_DONE	102
-#define AB8500_INT_USB_CH_TH_PROT_F	104
-#define AB8500_INT_USB_CH_TH_PROT_R    105
-#define AB8500_INT_MAIN_CH_TH_PROT_F   106
-#define AB8500_INT_MAIN_CH_TH_PROT_R	107
-#define AB8500_INT_USB_CHARGER_NOT_OKF	111
-
-#define AB8500_NR_IRQS			112
-#define AB8500_NUM_IRQ_REGS		14
-
-/**
- * struct ab8500 - ab8500 internal structure
- * @dev: parent device
- * @lock: read/write operations lock
- * @irq_lock: genirq bus lock
- * @irq: irq line
- * @chip_id: chip revision id
- * @write: register write
- * @read: register read
- * @rx_buf: rx buf for SPI
- * @tx_buf: tx buf for SPI
- * @mask: cache of IRQ regs for bus lock
- * @oldmask: cache of previous IRQ regs for bus lock
- */
-struct ab8500 {
-	struct device	*dev;
-	struct mutex	lock;
-	struct mutex	irq_lock;
-
-	int		irq_base;
-	int		irq;
-	u8		chip_id;
-
-	int (*write) (struct ab8500 *a8500, u16 addr, u8 data);
-	int (*read) (struct ab8500 *a8500, u16 addr);
-
-	unsigned long	tx_buf[4];
-	unsigned long	rx_buf[4];
-
-	u8 mask[AB8500_NUM_IRQ_REGS];
-	u8 oldmask[AB8500_NUM_IRQ_REGS];
-};
-
-struct regulator_reg_init;
-struct regulator_init_data;
-struct ab8500_gpio_platform_data;
-
-/**
- * struct ab8500_platform_data - AB8500 platform data
- * @irq_base: start of AB8500 IRQs, AB8500_NR_IRQS will be used
- * @init: board-specific initialization after detection of ab8500
- * @num_regulator_reg_init: number of regulator init registers
- * @regulator_reg_init: regulator init registers
- * @num_regulator: number of regulators
- * @regulator: machine-specific constraints for regulators
- */
-struct ab8500_platform_data {
-	int irq_base;
-	void (*init) (struct ab8500 *);
-	int num_regulator_reg_init;
-	struct ab8500_regulator_reg_init *regulator_reg_init;
-	int num_regulator;
-	struct regulator_init_data *regulator;
-	struct ab8500_gpio_platform_data *gpio;
-};
-
-extern int __devinit ab8500_init(struct ab8500 *ab8500);
-extern int __devexit ab8500_exit(struct ab8500 *ab8500);
-
-#endif /* MFD_AB8500_H */
diff --git a/include/linux/mfd/ab8500/gpadc.h b/include/linux/mfd/ab8500/gpadc.h
deleted file mode 100644
index 252966769d93..000000000000
--- a/include/linux/mfd/ab8500/gpadc.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2010 ST-Ericsson SA
- * Licensed under GPLv2.
- *
- * Author: Arun R Murthy <arun.murthy@stericsson.com>
- * Author: Daniel Willerud <daniel.willerud@stericsson.com>
- */
-
-#ifndef	_AB8500_GPADC_H
-#define _AB8500_GPADC_H
-
-/* GPADC source: From datasheet(ADCSwSel[4:0] in GPADCCtrl2) */
-#define BAT_CTRL	0x01
-#define BTEMP_BALL	0x02
-#define MAIN_CHARGER_V	0x03
-#define ACC_DETECT1	0x04
-#define ACC_DETECT2	0x05
-#define ADC_AUX1	0x06
-#define ADC_AUX2	0x07
-#define MAIN_BAT_V	0x08
-#define VBUS_V		0x09
-#define MAIN_CHARGER_C	0x0A
-#define USB_CHARGER_C	0x0B
-#define BK_BAT_V	0x0C
-#define DIE_TEMP	0x0D
-
-struct ab8500_gpadc;
-
-struct ab8500_gpadc *ab8500_gpadc_get(char *name);
-int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel);
-int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel);
-int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc,
-    u8 channel, int ad_value);
-
-#endif /* _AB8500_GPADC_H */
diff --git a/include/linux/mfd/ab8500/gpio.h b/include/linux/mfd/ab8500/gpio.h
deleted file mode 100644
index 488a8c920a29..000000000000
--- a/include/linux/mfd/ab8500/gpio.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright ST-Ericsson 2010.
- *
- * Author: Bibek Basu <bibek.basu@stericsson.com>
- * Licensed under GPLv2.
- */
-
-#ifndef _AB8500_GPIO_H
-#define _AB8500_GPIO_H
-
-/*
- * Platform data to register a block: only the initial gpio/irq number.
- */
-
-struct ab8500_gpio_platform_data {
-	int gpio_base;
-	u32 irq_base;
-	u8  config_reg[7];
-};
-
-#endif /* _AB8500_GPIO_H */
diff --git a/include/linux/mfd/ab8500/sysctrl.h b/include/linux/mfd/ab8500/sysctrl.h
deleted file mode 100644
index 10da0291f8f8..000000000000
--- a/include/linux/mfd/ab8500/sysctrl.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com> for ST Ericsson.
- * License terms: GNU General Public License (GPL) version 2
- */
-#ifndef __AB8500_SYSCTRL_H
-#define __AB8500_SYSCTRL_H
-
-#include <linux/bitops.h>
-
-#ifdef CONFIG_AB8500_CORE
-
-int ab8500_sysctrl_read(u16 reg, u8 *value);
-int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value);
-
-#else
-
-static inline int ab8500_sysctrl_read(u16 reg, u8 *value)
-{
-	return 0;
-}
-
-static inline int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value)
-{
-	return 0;
-}
-
-#endif /* CONFIG_AB8500_CORE */
-
-static inline int ab8500_sysctrl_set(u16 reg, u8 bits)
-{
-	return ab8500_sysctrl_write(reg, bits, bits);
-}
-
-static inline int ab8500_sysctrl_clear(u16 reg, u8 bits)
-{
-	return ab8500_sysctrl_write(reg, bits, 0);
-}
-
-/* Registers */
-#define AB8500_TURNONSTATUS		0x100
-#define AB8500_RESETSTATUS		0x101
-#define AB8500_PONKEY1PRESSSTATUS	0x102
-#define AB8500_SYSCLKREQSTATUS		0x142
-#define AB8500_STW4500CTRL1		0x180
-#define AB8500_STW4500CTRL2		0x181
-#define AB8500_STW4500CTRL3		0x200
-#define AB8500_MAINWDOGCTRL		0x201
-#define AB8500_MAINWDOGTIMER		0x202
-#define AB8500_LOWBAT			0x203
-#define AB8500_BATTOK			0x204
-#define AB8500_SYSCLKTIMER		0x205
-#define AB8500_SMPSCLKCTRL		0x206
-#define AB8500_SMPSCLKSEL1		0x207
-#define AB8500_SMPSCLKSEL2		0x208
-#define AB8500_SMPSCLKSEL3		0x209
-#define AB8500_SYSULPCLKCONF		0x20A
-#define AB8500_SYSULPCLKCTRL1		0x20B
-#define AB8500_SYSCLKCTRL		0x20C
-#define AB8500_SYSCLKREQ1VALID		0x20D
-#define AB8500_SYSTEMCTRLSUP		0x20F
-#define AB8500_SYSCLKREQ1RFCLKBUF	0x210
-#define AB8500_SYSCLKREQ2RFCLKBUF	0x211
-#define AB8500_SYSCLKREQ3RFCLKBUF	0x212
-#define AB8500_SYSCLKREQ4RFCLKBUF	0x213
-#define AB8500_SYSCLKREQ5RFCLKBUF	0x214
-#define AB8500_SYSCLKREQ6RFCLKBUF	0x215
-#define AB8500_SYSCLKREQ7RFCLKBUF	0x216
-#define AB8500_SYSCLKREQ8RFCLKBUF	0x217
-#define AB8500_DITHERCLKCTRL		0x220
-#define AB8500_SWATCTRL			0x230
-#define AB8500_HIQCLKCTRL		0x232
-#define AB8500_VSIMSYSCLKCTRL		0x233
-
-/* Bits */
-#define AB8500_TURNONSTATUS_PORNVBAT BIT(0)
-#define AB8500_TURNONSTATUS_PONKEY1DBF BIT(1)
-#define AB8500_TURNONSTATUS_PONKEY2DBF BIT(2)
-#define AB8500_TURNONSTATUS_RTCALARM BIT(3)
-#define AB8500_TURNONSTATUS_MAINCHDET BIT(4)
-#define AB8500_TURNONSTATUS_VBUSDET BIT(5)
-#define AB8500_TURNONSTATUS_USBIDDETECT BIT(6)
-
-#define AB8500_RESETSTATUS_RESETN4500NSTATUS BIT(0)
-#define AB8500_RESETSTATUS_SWRESETN4500NSTATUS BIT(2)
-
-#define AB8500_PONKEY1PRESSSTATUS_PONKEY1PRESSTIME_MASK 0x7F
-#define AB8500_PONKEY1PRESSSTATUS_PONKEY1PRESSTIME_SHIFT 0
-
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ1STATUS BIT(0)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ2STATUS BIT(1)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ3STATUS BIT(2)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ4STATUS BIT(3)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ5STATUS BIT(4)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ6STATUS BIT(5)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ7STATUS BIT(6)
-#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ8STATUS BIT(7)
-
-#define AB8500_STW4500CTRL1_SWOFF BIT(0)
-#define AB8500_STW4500CTRL1_SWRESET4500N BIT(1)
-#define AB8500_STW4500CTRL1_THDB8500SWOFF BIT(2)
-
-#define AB8500_STW4500CTRL2_RESETNVAUX1VALID BIT(0)
-#define AB8500_STW4500CTRL2_RESETNVAUX2VALID BIT(1)
-#define AB8500_STW4500CTRL2_RESETNVAUX3VALID BIT(2)
-#define AB8500_STW4500CTRL2_RESETNVMODVALID BIT(3)
-#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY1VALID BIT(4)
-#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY2VALID BIT(5)
-#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY3VALID BIT(6)
-#define AB8500_STW4500CTRL2_RESETNVSMPS1VALID BIT(7)
-
-#define AB8500_STW4500CTRL3_CLK32KOUT2DIS BIT(0)
-#define AB8500_STW4500CTRL3_RESETAUDN BIT(1)
-#define AB8500_STW4500CTRL3_RESETDENCN BIT(2)
-#define AB8500_STW4500CTRL3_THSDENA BIT(3)
-
-#define AB8500_MAINWDOGCTRL_MAINWDOGENA BIT(0)
-#define AB8500_MAINWDOGCTRL_MAINWDOGKICK BIT(1)
-#define AB8500_MAINWDOGCTRL_WDEXPTURNONVALID BIT(4)
-
-#define AB8500_MAINWDOGTIMER_MAINWDOGTIMER_MASK 0x7F
-#define AB8500_MAINWDOGTIMER_MAINWDOGTIMER_SHIFT 0
-
-#define AB8500_LOWBAT_LOWBATENA BIT(0)
-#define AB8500_LOWBAT_LOWBAT_MASK 0x7E
-#define AB8500_LOWBAT_LOWBAT_SHIFT 1
-
-#define AB8500_BATTOK_BATTOKSEL0THF_MASK 0x0F
-#define AB8500_BATTOK_BATTOKSEL0THF_SHIFT 0
-#define AB8500_BATTOK_BATTOKSEL1THF_MASK 0xF0
-#define AB8500_BATTOK_BATTOKSEL1THF_SHIFT 4
-
-#define AB8500_SYSCLKTIMER_SYSCLKTIMER_MASK 0x0F
-#define AB8500_SYSCLKTIMER_SYSCLKTIMER_SHIFT 0
-#define AB8500_SYSCLKTIMER_SYSCLKTIMERADJ_MASK 0xF0
-#define AB8500_SYSCLKTIMER_SYSCLKTIMERADJ_SHIFT 4
-
-#define AB8500_SMPSCLKCTRL_SMPSCLKINTSEL_MASK 0x03
-#define AB8500_SMPSCLKCTRL_SMPSCLKINTSEL_SHIFT 0
-#define AB8500_SMPSCLKCTRL_3M2CLKINTENA BIT(2)
-
-#define AB8500_SMPSCLKSEL1_VARMCLKSEL_MASK 0x07
-#define AB8500_SMPSCLKSEL1_VARMCLKSEL_SHIFT 0
-#define AB8500_SMPSCLKSEL1_VAPECLKSEL_MASK 0x38
-#define AB8500_SMPSCLKSEL1_VAPECLKSEL_SHIFT 3
-
-#define AB8500_SMPSCLKSEL2_VMODCLKSEL_MASK 0x07
-#define AB8500_SMPSCLKSEL2_VMODCLKSEL_SHIFT 0
-#define AB8500_SMPSCLKSEL2_VSMPS1CLKSEL_MASK 0x38
-#define AB8500_SMPSCLKSEL2_VSMPS1CLKSEL_SHIFT 3
-
-#define AB8500_SMPSCLKSEL3_VSMPS2CLKSEL_MASK 0x07
-#define AB8500_SMPSCLKSEL3_VSMPS2CLKSEL_SHIFT 0
-#define AB8500_SMPSCLKSEL3_VSMPS3CLKSEL_MASK 0x38
-#define AB8500_SMPSCLKSEL3_VSMPS3CLKSEL_SHIFT 3
-
-#define AB8500_SYSULPCLKCONF_ULPCLKCONF_MASK 0x03
-#define AB8500_SYSULPCLKCONF_ULPCLKCONF_SHIFT 0
-#define AB8500_SYSULPCLKCONF_CLK27MHZSTRE BIT(2)
-#define AB8500_SYSULPCLKCONF_TVOUTCLKDELN BIT(3)
-#define AB8500_SYSULPCLKCONF_TVOUTCLKINV BIT(4)
-#define AB8500_SYSULPCLKCONF_ULPCLKSTRE BIT(5)
-#define AB8500_SYSULPCLKCONF_CLK27MHZBUFENA BIT(6)
-#define AB8500_SYSULPCLKCONF_CLK27MHZPDENA BIT(7)
-
-#define AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_MASK 0x03
-#define AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_SHIFT 0
-#define AB8500_SYSULPCLKCTRL1_ULPCLKREQ BIT(2)
-#define AB8500_SYSULPCLKCTRL1_4500SYSCLKREQ BIT(3)
-#define AB8500_SYSULPCLKCTRL1_AUDIOCLKENA BIT(4)
-#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF2REQ BIT(5)
-#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF3REQ BIT(6)
-#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF4REQ BIT(7)
-
-#define AB8500_SYSCLKCTRL_TVOUTPLLENA BIT(0)
-#define AB8500_SYSCLKCTRL_TVOUTCLKENA BIT(1)
-#define AB8500_SYSCLKCTRL_USBCLKENA BIT(2)
-
-#define AB8500_SYSCLKREQ1VALID_SYSCLKREQ1VALID BIT(0)
-#define AB8500_SYSCLKREQ1VALID_ULPCLKREQ1VALID BIT(1)
-#define AB8500_SYSCLKREQ1VALID_USBSYSCLKREQ1VALID BIT(2)
-
-#define AB8500_SYSTEMCTRLSUP_EXTSUP12LPNCLKSEL_MASK 0x03
-#define AB8500_SYSTEMCTRLSUP_EXTSUP12LPNCLKSEL_SHIFT 0
-#define AB8500_SYSTEMCTRLSUP_EXTSUP3LPNCLKSEL_MASK 0x0C
-#define AB8500_SYSTEMCTRLSUP_EXTSUP3LPNCLKSEL_SHIFT 2
-#define AB8500_SYSTEMCTRLSUP_INTDB8500NOD BIT(4)
-
-#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF4 BIT(4)
-
-#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF2 BIT(2)
-#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF3 BIT(3)
-#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF4 BIT(4)
-
-#define AB8500_DITHERCLKCTRL_VARMDITHERENA BIT(0)
-#define AB8500_DITHERCLKCTRL_VSMPS3DITHERENA BIT(1)
-#define AB8500_DITHERCLKCTRL_VSMPS1DITHERENA BIT(2)
-#define AB8500_DITHERCLKCTRL_VSMPS2DITHERENA BIT(3)
-#define AB8500_DITHERCLKCTRL_VMODDITHERENA BIT(4)
-#define AB8500_DITHERCLKCTRL_VAPEDITHERENA BIT(5)
-#define AB8500_DITHERCLKCTRL_DITHERDEL_MASK 0xC0
-#define AB8500_DITHERCLKCTRL_DITHERDEL_SHIFT 6
-
-#define AB8500_SWATCTRL_UPDATERF BIT(0)
-#define AB8500_SWATCTRL_SWATENABLE BIT(1)
-#define AB8500_SWATCTRL_RFOFFTIMER_MASK 0x1C
-#define AB8500_SWATCTRL_RFOFFTIMER_SHIFT 2
-#define AB8500_SWATCTRL_SWATBIT5 BIT(6)
-
-#define AB8500_HIQCLKCTRL_SYSCLKREQ1HIQENAVALID BIT(0)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ2HIQENAVALID BIT(1)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ3HIQENAVALID BIT(2)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ4HIQENAVALID BIT(3)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ5HIQENAVALID BIT(4)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ6HIQENAVALID BIT(5)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ7HIQENAVALID BIT(6)
-#define AB8500_HIQCLKCTRL_SYSCLKREQ8HIQENAVALID BIT(7)
-
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ1VALID BIT(0)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ2VALID BIT(1)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ3VALID BIT(2)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ4VALID BIT(3)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ5VALID BIT(4)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ6VALID BIT(5)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ7VALID BIT(6)
-#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ8VALID BIT(7)
-
-#endif /* __AB8500_SYSCTRL_H */
diff --git a/include/linux/mfd/abx500/ab5500.h b/include/linux/mfd/abx500/ab5500.h
new file mode 100644
index 000000000000..a720051ae933
--- /dev/null
+++ b/include/linux/mfd/abx500/ab5500.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) ST-Ericsson 2011
+ *
+ * License Terms: GNU General Public License v2
+ */
+#ifndef MFD_AB5500_H
+#define MFD_AB5500_H
+
+#include <linux/device.h>
+
+enum ab5500_devid {
+	AB5500_DEVID_ADC,
+	AB5500_DEVID_LEDS,
+	AB5500_DEVID_POWER,
+	AB5500_DEVID_REGULATORS,
+	AB5500_DEVID_SIM,
+	AB5500_DEVID_RTC,
+	AB5500_DEVID_CHARGER,
+	AB5500_DEVID_FUELGAUGE,
+	AB5500_DEVID_VIBRATOR,
+	AB5500_DEVID_CODEC,
+	AB5500_DEVID_USB,
+	AB5500_DEVID_OTP,
+	AB5500_DEVID_VIDEO,
+	AB5500_DEVID_DBIECI,
+	AB5500_DEVID_ONSWA,
+	AB5500_NUM_DEVICES,
+};
+
+enum ab5500_banks {
+	AB5500_BANK_VIT_IO_I2C_CLK_TST_OTP = 0,
+	AB5500_BANK_VDDDIG_IO_I2C_CLK_TST = 1,
+	AB5500_BANK_VDENC = 2,
+	AB5500_BANK_SIM_USBSIM  = 3,
+	AB5500_BANK_LED = 4,
+	AB5500_BANK_ADC  = 5,
+	AB5500_BANK_RTC  = 6,
+	AB5500_BANK_STARTUP  = 7,
+	AB5500_BANK_DBI_ECI  = 8,
+	AB5500_BANK_CHG  = 9,
+	AB5500_BANK_FG_BATTCOM_ACC = 10,
+	AB5500_BANK_USB = 11,
+	AB5500_BANK_IT = 12,
+	AB5500_BANK_VIBRA = 13,
+	AB5500_BANK_AUDIO_HEADSETUSB = 14,
+	AB5500_NUM_BANKS = 15,
+};
+
+enum ab5500_banks_addr {
+	AB5500_ADDR_VIT_IO_I2C_CLK_TST_OTP = 0x4A,
+	AB5500_ADDR_VDDDIG_IO_I2C_CLK_TST = 0x4B,
+	AB5500_ADDR_VDENC = 0x06,
+	AB5500_ADDR_SIM_USBSIM  = 0x04,
+	AB5500_ADDR_LED = 0x10,
+	AB5500_ADDR_ADC  = 0x0A,
+	AB5500_ADDR_RTC  = 0x0F,
+	AB5500_ADDR_STARTUP  = 0x03,
+	AB5500_ADDR_DBI_ECI  = 0x07,
+	AB5500_ADDR_CHG  = 0x0B,
+	AB5500_ADDR_FG_BATTCOM_ACC = 0x0C,
+	AB5500_ADDR_USB = 0x05,
+	AB5500_ADDR_IT = 0x0E,
+	AB5500_ADDR_VIBRA = 0x02,
+	AB5500_ADDR_AUDIO_HEADSETUSB = 0x0D,
+};
+
+/*
+ * Interrupt register offsets
+ * Bank : 0x0E
+ */
+#define AB5500_IT_SOURCE0_REG		0x20
+#define AB5500_IT_SOURCE1_REG		0x21
+#define AB5500_IT_SOURCE2_REG		0x22
+#define AB5500_IT_SOURCE3_REG		0x23
+#define AB5500_IT_SOURCE4_REG		0x24
+#define AB5500_IT_SOURCE5_REG		0x25
+#define AB5500_IT_SOURCE6_REG		0x26
+#define AB5500_IT_SOURCE7_REG		0x27
+#define AB5500_IT_SOURCE8_REG		0x28
+#define AB5500_IT_SOURCE9_REG		0x29
+#define AB5500_IT_SOURCE10_REG		0x2A
+#define AB5500_IT_SOURCE11_REG		0x2B
+#define AB5500_IT_SOURCE12_REG		0x2C
+#define AB5500_IT_SOURCE13_REG		0x2D
+#define AB5500_IT_SOURCE14_REG		0x2E
+#define AB5500_IT_SOURCE15_REG		0x2F
+#define AB5500_IT_SOURCE16_REG		0x30
+#define AB5500_IT_SOURCE17_REG		0x31
+#define AB5500_IT_SOURCE18_REG		0x32
+#define AB5500_IT_SOURCE19_REG		0x33
+#define AB5500_IT_SOURCE20_REG		0x34
+#define AB5500_IT_SOURCE21_REG		0x35
+#define AB5500_IT_SOURCE22_REG		0x36
+#define AB5500_IT_SOURCE23_REG		0x37
+
+#define AB5500_NUM_IRQ_REGS		23
+
+/**
+ * struct ab5500
+ * @access_mutex: lock out concurrent accesses to the AB registers
+ * @dev: a pointer to the device struct for this chip driver
+ * @ab5500_irq: the analog baseband irq
+ * @irq_base: the platform configuration irq base for subdevices
+ * @chip_name: name of this chip variant
+ * @chip_id: 8 bit chip ID for this chip variant
+ * @irq_lock: a lock to protect the mask
+ * @abb_events: a local bit mask of the prcmu wakeup events
+ * @event_mask: a local copy of the mask event registers
+ * @last_event_mask: a copy of the last event_mask written to hardware
+ * @startup_events: a copy of the first reading of the event registers
+ * @startup_events_read: whether the first events have been read
+ */
+struct ab5500 {
+	struct mutex access_mutex;
+	struct device *dev;
+	unsigned int ab5500_irq;
+	unsigned int irq_base;
+	char chip_name[32];
+	u8 chip_id;
+	struct mutex irq_lock;
+	u32 abb_events;
+	u8 mask[AB5500_NUM_IRQ_REGS];
+	u8 oldmask[AB5500_NUM_IRQ_REGS];
+	u8 startup_events[AB5500_NUM_IRQ_REGS];
+	bool startup_events_read;
+#ifdef CONFIG_DEBUG_FS
+	unsigned int debug_bank;
+	unsigned int debug_address;
+#endif
+};
+
+struct ab5500_platform_data {
+	struct {unsigned int base; unsigned int count; } irq;
+	void *dev_data[AB5500_NUM_DEVICES];
+	struct abx500_init_settings *init_settings;
+	unsigned int init_settings_sz;
+	bool pm_power_off;
+};
+
+#endif /* MFD_AB5500_H */
diff --git a/include/linux/mfd/abx500/ab8500-gpadc.h b/include/linux/mfd/abx500/ab8500-gpadc.h
new file mode 100644
index 000000000000..252966769d93
--- /dev/null
+++ b/include/linux/mfd/abx500/ab8500-gpadc.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2010 ST-Ericsson SA
+ * Licensed under GPLv2.
+ *
+ * Author: Arun R Murthy <arun.murthy@stericsson.com>
+ * Author: Daniel Willerud <daniel.willerud@stericsson.com>
+ */
+
+#ifndef	_AB8500_GPADC_H
+#define _AB8500_GPADC_H
+
+/* GPADC source: From datasheet(ADCSwSel[4:0] in GPADCCtrl2) */
+#define BAT_CTRL	0x01
+#define BTEMP_BALL	0x02
+#define MAIN_CHARGER_V	0x03
+#define ACC_DETECT1	0x04
+#define ACC_DETECT2	0x05
+#define ADC_AUX1	0x06
+#define ADC_AUX2	0x07
+#define MAIN_BAT_V	0x08
+#define VBUS_V		0x09
+#define MAIN_CHARGER_C	0x0A
+#define USB_CHARGER_C	0x0B
+#define BK_BAT_V	0x0C
+#define DIE_TEMP	0x0D
+
+struct ab8500_gpadc;
+
+struct ab8500_gpadc *ab8500_gpadc_get(char *name);
+int ab8500_gpadc_convert(struct ab8500_gpadc *gpadc, u8 channel);
+int ab8500_gpadc_read_raw(struct ab8500_gpadc *gpadc, u8 channel);
+int ab8500_gpadc_ad_to_voltage(struct ab8500_gpadc *gpadc,
+    u8 channel, int ad_value);
+
+#endif /* _AB8500_GPADC_H */
diff --git a/include/linux/mfd/abx500/ab8500-gpio.h b/include/linux/mfd/abx500/ab8500-gpio.h
new file mode 100644
index 000000000000..488a8c920a29
--- /dev/null
+++ b/include/linux/mfd/abx500/ab8500-gpio.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright ST-Ericsson 2010.
+ *
+ * Author: Bibek Basu <bibek.basu@stericsson.com>
+ * Licensed under GPLv2.
+ */
+
+#ifndef _AB8500_GPIO_H
+#define _AB8500_GPIO_H
+
+/*
+ * Platform data to register a block: only the initial gpio/irq number.
+ */
+
+struct ab8500_gpio_platform_data {
+	int gpio_base;
+	u32 irq_base;
+	u8  config_reg[7];
+};
+
+#endif /* _AB8500_GPIO_H */
diff --git a/include/linux/mfd/abx500/ab8500-sysctrl.h b/include/linux/mfd/abx500/ab8500-sysctrl.h
new file mode 100644
index 000000000000..10da0291f8f8
--- /dev/null
+++ b/include/linux/mfd/abx500/ab8500-sysctrl.h
@@ -0,0 +1,254 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ * Author: Mattias Nilsson <mattias.i.nilsson@stericsson.com> for ST Ericsson.
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#ifndef __AB8500_SYSCTRL_H
+#define __AB8500_SYSCTRL_H
+
+#include <linux/bitops.h>
+
+#ifdef CONFIG_AB8500_CORE
+
+int ab8500_sysctrl_read(u16 reg, u8 *value);
+int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value);
+
+#else
+
+static inline int ab8500_sysctrl_read(u16 reg, u8 *value)
+{
+	return 0;
+}
+
+static inline int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value)
+{
+	return 0;
+}
+
+#endif /* CONFIG_AB8500_CORE */
+
+static inline int ab8500_sysctrl_set(u16 reg, u8 bits)
+{
+	return ab8500_sysctrl_write(reg, bits, bits);
+}
+
+static inline int ab8500_sysctrl_clear(u16 reg, u8 bits)
+{
+	return ab8500_sysctrl_write(reg, bits, 0);
+}
+
+/* Registers */
+#define AB8500_TURNONSTATUS		0x100
+#define AB8500_RESETSTATUS		0x101
+#define AB8500_PONKEY1PRESSSTATUS	0x102
+#define AB8500_SYSCLKREQSTATUS		0x142
+#define AB8500_STW4500CTRL1		0x180
+#define AB8500_STW4500CTRL2		0x181
+#define AB8500_STW4500CTRL3		0x200
+#define AB8500_MAINWDOGCTRL		0x201
+#define AB8500_MAINWDOGTIMER		0x202
+#define AB8500_LOWBAT			0x203
+#define AB8500_BATTOK			0x204
+#define AB8500_SYSCLKTIMER		0x205
+#define AB8500_SMPSCLKCTRL		0x206
+#define AB8500_SMPSCLKSEL1		0x207
+#define AB8500_SMPSCLKSEL2		0x208
+#define AB8500_SMPSCLKSEL3		0x209
+#define AB8500_SYSULPCLKCONF		0x20A
+#define AB8500_SYSULPCLKCTRL1		0x20B
+#define AB8500_SYSCLKCTRL		0x20C
+#define AB8500_SYSCLKREQ1VALID		0x20D
+#define AB8500_SYSTEMCTRLSUP		0x20F
+#define AB8500_SYSCLKREQ1RFCLKBUF	0x210
+#define AB8500_SYSCLKREQ2RFCLKBUF	0x211
+#define AB8500_SYSCLKREQ3RFCLKBUF	0x212
+#define AB8500_SYSCLKREQ4RFCLKBUF	0x213
+#define AB8500_SYSCLKREQ5RFCLKBUF	0x214
+#define AB8500_SYSCLKREQ6RFCLKBUF	0x215
+#define AB8500_SYSCLKREQ7RFCLKBUF	0x216
+#define AB8500_SYSCLKREQ8RFCLKBUF	0x217
+#define AB8500_DITHERCLKCTRL		0x220
+#define AB8500_SWATCTRL			0x230
+#define AB8500_HIQCLKCTRL		0x232
+#define AB8500_VSIMSYSCLKCTRL		0x233
+
+/* Bits */
+#define AB8500_TURNONSTATUS_PORNVBAT BIT(0)
+#define AB8500_TURNONSTATUS_PONKEY1DBF BIT(1)
+#define AB8500_TURNONSTATUS_PONKEY2DBF BIT(2)
+#define AB8500_TURNONSTATUS_RTCALARM BIT(3)
+#define AB8500_TURNONSTATUS_MAINCHDET BIT(4)
+#define AB8500_TURNONSTATUS_VBUSDET BIT(5)
+#define AB8500_TURNONSTATUS_USBIDDETECT BIT(6)
+
+#define AB8500_RESETSTATUS_RESETN4500NSTATUS BIT(0)
+#define AB8500_RESETSTATUS_SWRESETN4500NSTATUS BIT(2)
+
+#define AB8500_PONKEY1PRESSSTATUS_PONKEY1PRESSTIME_MASK 0x7F
+#define AB8500_PONKEY1PRESSSTATUS_PONKEY1PRESSTIME_SHIFT 0
+
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ1STATUS BIT(0)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ2STATUS BIT(1)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ3STATUS BIT(2)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ4STATUS BIT(3)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ5STATUS BIT(4)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ6STATUS BIT(5)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ7STATUS BIT(6)
+#define AB8500_SYSCLKREQSTATUS_SYSCLKREQ8STATUS BIT(7)
+
+#define AB8500_STW4500CTRL1_SWOFF BIT(0)
+#define AB8500_STW4500CTRL1_SWRESET4500N BIT(1)
+#define AB8500_STW4500CTRL1_THDB8500SWOFF BIT(2)
+
+#define AB8500_STW4500CTRL2_RESETNVAUX1VALID BIT(0)
+#define AB8500_STW4500CTRL2_RESETNVAUX2VALID BIT(1)
+#define AB8500_STW4500CTRL2_RESETNVAUX3VALID BIT(2)
+#define AB8500_STW4500CTRL2_RESETNVMODVALID BIT(3)
+#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY1VALID BIT(4)
+#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY2VALID BIT(5)
+#define AB8500_STW4500CTRL2_RESETNVEXTSUPPLY3VALID BIT(6)
+#define AB8500_STW4500CTRL2_RESETNVSMPS1VALID BIT(7)
+
+#define AB8500_STW4500CTRL3_CLK32KOUT2DIS BIT(0)
+#define AB8500_STW4500CTRL3_RESETAUDN BIT(1)
+#define AB8500_STW4500CTRL3_RESETDENCN BIT(2)
+#define AB8500_STW4500CTRL3_THSDENA BIT(3)
+
+#define AB8500_MAINWDOGCTRL_MAINWDOGENA BIT(0)
+#define AB8500_MAINWDOGCTRL_MAINWDOGKICK BIT(1)
+#define AB8500_MAINWDOGCTRL_WDEXPTURNONVALID BIT(4)
+
+#define AB8500_MAINWDOGTIMER_MAINWDOGTIMER_MASK 0x7F
+#define AB8500_MAINWDOGTIMER_MAINWDOGTIMER_SHIFT 0
+
+#define AB8500_LOWBAT_LOWBATENA BIT(0)
+#define AB8500_LOWBAT_LOWBAT_MASK 0x7E
+#define AB8500_LOWBAT_LOWBAT_SHIFT 1
+
+#define AB8500_BATTOK_BATTOKSEL0THF_MASK 0x0F
+#define AB8500_BATTOK_BATTOKSEL0THF_SHIFT 0
+#define AB8500_BATTOK_BATTOKSEL1THF_MASK 0xF0
+#define AB8500_BATTOK_BATTOKSEL1THF_SHIFT 4
+
+#define AB8500_SYSCLKTIMER_SYSCLKTIMER_MASK 0x0F
+#define AB8500_SYSCLKTIMER_SYSCLKTIMER_SHIFT 0
+#define AB8500_SYSCLKTIMER_SYSCLKTIMERADJ_MASK 0xF0
+#define AB8500_SYSCLKTIMER_SYSCLKTIMERADJ_SHIFT 4
+
+#define AB8500_SMPSCLKCTRL_SMPSCLKINTSEL_MASK 0x03
+#define AB8500_SMPSCLKCTRL_SMPSCLKINTSEL_SHIFT 0
+#define AB8500_SMPSCLKCTRL_3M2CLKINTENA BIT(2)
+
+#define AB8500_SMPSCLKSEL1_VARMCLKSEL_MASK 0x07
+#define AB8500_SMPSCLKSEL1_VARMCLKSEL_SHIFT 0
+#define AB8500_SMPSCLKSEL1_VAPECLKSEL_MASK 0x38
+#define AB8500_SMPSCLKSEL1_VAPECLKSEL_SHIFT 3
+
+#define AB8500_SMPSCLKSEL2_VMODCLKSEL_MASK 0x07
+#define AB8500_SMPSCLKSEL2_VMODCLKSEL_SHIFT 0
+#define AB8500_SMPSCLKSEL2_VSMPS1CLKSEL_MASK 0x38
+#define AB8500_SMPSCLKSEL2_VSMPS1CLKSEL_SHIFT 3
+
+#define AB8500_SMPSCLKSEL3_VSMPS2CLKSEL_MASK 0x07
+#define AB8500_SMPSCLKSEL3_VSMPS2CLKSEL_SHIFT 0
+#define AB8500_SMPSCLKSEL3_VSMPS3CLKSEL_MASK 0x38
+#define AB8500_SMPSCLKSEL3_VSMPS3CLKSEL_SHIFT 3
+
+#define AB8500_SYSULPCLKCONF_ULPCLKCONF_MASK 0x03
+#define AB8500_SYSULPCLKCONF_ULPCLKCONF_SHIFT 0
+#define AB8500_SYSULPCLKCONF_CLK27MHZSTRE BIT(2)
+#define AB8500_SYSULPCLKCONF_TVOUTCLKDELN BIT(3)
+#define AB8500_SYSULPCLKCONF_TVOUTCLKINV BIT(4)
+#define AB8500_SYSULPCLKCONF_ULPCLKSTRE BIT(5)
+#define AB8500_SYSULPCLKCONF_CLK27MHZBUFENA BIT(6)
+#define AB8500_SYSULPCLKCONF_CLK27MHZPDENA BIT(7)
+
+#define AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_MASK 0x03
+#define AB8500_SYSULPCLKCTRL1_SYSULPCLKINTSEL_SHIFT 0
+#define AB8500_SYSULPCLKCTRL1_ULPCLKREQ BIT(2)
+#define AB8500_SYSULPCLKCTRL1_4500SYSCLKREQ BIT(3)
+#define AB8500_SYSULPCLKCTRL1_AUDIOCLKENA BIT(4)
+#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF2REQ BIT(5)
+#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF3REQ BIT(6)
+#define AB8500_SYSULPCLKCTRL1_SYSCLKBUF4REQ BIT(7)
+
+#define AB8500_SYSCLKCTRL_TVOUTPLLENA BIT(0)
+#define AB8500_SYSCLKCTRL_TVOUTCLKENA BIT(1)
+#define AB8500_SYSCLKCTRL_USBCLKENA BIT(2)
+
+#define AB8500_SYSCLKREQ1VALID_SYSCLKREQ1VALID BIT(0)
+#define AB8500_SYSCLKREQ1VALID_ULPCLKREQ1VALID BIT(1)
+#define AB8500_SYSCLKREQ1VALID_USBSYSCLKREQ1VALID BIT(2)
+
+#define AB8500_SYSTEMCTRLSUP_EXTSUP12LPNCLKSEL_MASK 0x03
+#define AB8500_SYSTEMCTRLSUP_EXTSUP12LPNCLKSEL_SHIFT 0
+#define AB8500_SYSTEMCTRLSUP_EXTSUP3LPNCLKSEL_MASK 0x0C
+#define AB8500_SYSTEMCTRLSUP_EXTSUP3LPNCLKSEL_SHIFT 2
+#define AB8500_SYSTEMCTRLSUP_INTDB8500NOD BIT(4)
+
+#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ1RFCLKBUF_SYSCLKREQ1RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ2RFCLKBUF_SYSCLKREQ2RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ3RFCLKBUF_SYSCLKREQ3RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ4RFCLKBUF_SYSCLKREQ4RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ5RFCLKBUF_SYSCLKREQ5RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ6RFCLKBUF_SYSCLKREQ6RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ7RFCLKBUF_SYSCLKREQ7RFCLKBUF4 BIT(4)
+
+#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF2 BIT(2)
+#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF3 BIT(3)
+#define AB8500_SYSCLKREQ8RFCLKBUF_SYSCLKREQ8RFCLKBUF4 BIT(4)
+
+#define AB8500_DITHERCLKCTRL_VARMDITHERENA BIT(0)
+#define AB8500_DITHERCLKCTRL_VSMPS3DITHERENA BIT(1)
+#define AB8500_DITHERCLKCTRL_VSMPS1DITHERENA BIT(2)
+#define AB8500_DITHERCLKCTRL_VSMPS2DITHERENA BIT(3)
+#define AB8500_DITHERCLKCTRL_VMODDITHERENA BIT(4)
+#define AB8500_DITHERCLKCTRL_VAPEDITHERENA BIT(5)
+#define AB8500_DITHERCLKCTRL_DITHERDEL_MASK 0xC0
+#define AB8500_DITHERCLKCTRL_DITHERDEL_SHIFT 6
+
+#define AB8500_SWATCTRL_UPDATERF BIT(0)
+#define AB8500_SWATCTRL_SWATENABLE BIT(1)
+#define AB8500_SWATCTRL_RFOFFTIMER_MASK 0x1C
+#define AB8500_SWATCTRL_RFOFFTIMER_SHIFT 2
+#define AB8500_SWATCTRL_SWATBIT5 BIT(6)
+
+#define AB8500_HIQCLKCTRL_SYSCLKREQ1HIQENAVALID BIT(0)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ2HIQENAVALID BIT(1)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ3HIQENAVALID BIT(2)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ4HIQENAVALID BIT(3)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ5HIQENAVALID BIT(4)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ6HIQENAVALID BIT(5)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ7HIQENAVALID BIT(6)
+#define AB8500_HIQCLKCTRL_SYSCLKREQ8HIQENAVALID BIT(7)
+
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ1VALID BIT(0)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ2VALID BIT(1)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ3VALID BIT(2)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ4VALID BIT(3)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ5VALID BIT(4)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ6VALID BIT(5)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ7VALID BIT(6)
+#define AB8500_VSIMSYSCLKCTRL_VSIMSYSCLKREQ8VALID BIT(7)
+
+#endif /* __AB8500_SYSCTRL_H */
diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h
new file mode 100644
index 000000000000..838c6b487cc5
--- /dev/null
+++ b/include/linux/mfd/abx500/ab8500.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2010
+ *
+ * License Terms: GNU General Public License v2
+ * Author: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
+ */
+#ifndef MFD_AB8500_H
+#define MFD_AB8500_H
+
+#include <linux/device.h>
+
+/*
+ * AB8500 bank addresses
+ */
+#define AB8500_SYS_CTRL1_BLOCK	0x1
+#define AB8500_SYS_CTRL2_BLOCK	0x2
+#define AB8500_REGU_CTRL1	0x3
+#define AB8500_REGU_CTRL2	0x4
+#define AB8500_USB		0x5
+#define AB8500_TVOUT		0x6
+#define AB8500_DBI		0x7
+#define AB8500_ECI_AV_ACC	0x8
+#define AB8500_RESERVED		0x9
+#define AB8500_GPADC		0xA
+#define AB8500_CHARGER		0xB
+#define AB8500_GAS_GAUGE	0xC
+#define AB8500_AUDIO		0xD
+#define AB8500_INTERRUPT	0xE
+#define AB8500_RTC		0xF
+#define AB8500_MISC		0x10
+#define AB8500_DEVELOPMENT	0x11
+#define AB8500_DEBUG		0x12
+#define AB8500_PROD_TEST	0x13
+#define AB8500_OTP_EMUL		0x15
+
+/*
+ * Interrupts
+ */
+
+#define AB8500_INT_MAIN_EXT_CH_NOT_OK	0
+#define AB8500_INT_UN_PLUG_TV_DET	1
+#define AB8500_INT_PLUG_TV_DET		2
+#define AB8500_INT_TEMP_WARM		3
+#define AB8500_INT_PON_KEY2DB_F		4
+#define AB8500_INT_PON_KEY2DB_R		5
+#define AB8500_INT_PON_KEY1DB_F		6
+#define AB8500_INT_PON_KEY1DB_R		7
+#define AB8500_INT_BATT_OVV		8
+#define AB8500_INT_MAIN_CH_UNPLUG_DET	10
+#define AB8500_INT_MAIN_CH_PLUG_DET	11
+#define AB8500_INT_USB_ID_DET_F		12
+#define AB8500_INT_USB_ID_DET_R		13
+#define AB8500_INT_VBUS_DET_F		14
+#define AB8500_INT_VBUS_DET_R		15
+#define AB8500_INT_VBUS_CH_DROP_END	16
+#define AB8500_INT_RTC_60S		17
+#define AB8500_INT_RTC_ALARM		18
+#define AB8500_INT_BAT_CTRL_INDB	20
+#define AB8500_INT_CH_WD_EXP		21
+#define AB8500_INT_VBUS_OVV		22
+#define AB8500_INT_MAIN_CH_DROP_END	23
+#define AB8500_INT_CCN_CONV_ACC		24
+#define AB8500_INT_INT_AUD		25
+#define AB8500_INT_CCEOC		26
+#define AB8500_INT_CC_INT_CALIB		27
+#define AB8500_INT_LOW_BAT_F		28
+#define AB8500_INT_LOW_BAT_R		29
+#define AB8500_INT_BUP_CHG_NOT_OK	30
+#define AB8500_INT_BUP_CHG_OK		31
+#define AB8500_INT_GP_HW_ADC_CONV_END	32
+#define AB8500_INT_ACC_DETECT_1DB_F	33
+#define AB8500_INT_ACC_DETECT_1DB_R	34
+#define AB8500_INT_ACC_DETECT_22DB_F	35
+#define AB8500_INT_ACC_DETECT_22DB_R	36
+#define AB8500_INT_ACC_DETECT_21DB_F	37
+#define AB8500_INT_ACC_DETECT_21DB_R	38
+#define AB8500_INT_GP_SW_ADC_CONV_END	39
+#define AB8500_INT_GPIO6R		40
+#define AB8500_INT_GPIO7R		41
+#define AB8500_INT_GPIO8R		42
+#define AB8500_INT_GPIO9R		43
+#define AB8500_INT_GPIO10R		44
+#define AB8500_INT_GPIO11R		45
+#define AB8500_INT_GPIO12R		46
+#define AB8500_INT_GPIO13R		47
+#define AB8500_INT_GPIO24R		48
+#define AB8500_INT_GPIO25R		49
+#define AB8500_INT_GPIO36R		50
+#define AB8500_INT_GPIO37R		51
+#define AB8500_INT_GPIO38R		52
+#define AB8500_INT_GPIO39R		53
+#define AB8500_INT_GPIO40R		54
+#define AB8500_INT_GPIO41R		55
+#define AB8500_INT_GPIO6F		56
+#define AB8500_INT_GPIO7F		57
+#define AB8500_INT_GPIO8F		58
+#define AB8500_INT_GPIO9F		59
+#define AB8500_INT_GPIO10F		60
+#define AB8500_INT_GPIO11F		61
+#define AB8500_INT_GPIO12F		62
+#define AB8500_INT_GPIO13F		63
+#define AB8500_INT_GPIO24F		64
+#define AB8500_INT_GPIO25F		65
+#define AB8500_INT_GPIO36F		66
+#define AB8500_INT_GPIO37F		67
+#define AB8500_INT_GPIO38F		68
+#define AB8500_INT_GPIO39F		69
+#define AB8500_INT_GPIO40F		70
+#define AB8500_INT_GPIO41F		71
+#define AB8500_INT_ADP_SOURCE_ERROR	72
+#define AB8500_INT_ADP_SINK_ERROR	73
+#define AB8500_INT_ADP_PROBE_PLUG	74
+#define AB8500_INT_ADP_PROBE_UNPLUG	75
+#define AB8500_INT_ADP_SENSE_OFF	76
+#define AB8500_INT_USB_PHY_POWER_ERR	78
+#define AB8500_INT_USB_LINK_STATUS	79
+#define AB8500_INT_BTEMP_LOW		80
+#define AB8500_INT_BTEMP_LOW_MEDIUM	81
+#define AB8500_INT_BTEMP_MEDIUM_HIGH	82
+#define AB8500_INT_BTEMP_HIGH		83
+#define AB8500_INT_USB_CHARGER_NOT_OK	89
+#define AB8500_INT_ID_WAKEUP_R		90
+#define AB8500_INT_ID_DET_R1R		92
+#define AB8500_INT_ID_DET_R2R		93
+#define AB8500_INT_ID_DET_R3R		94
+#define AB8500_INT_ID_DET_R4R		95
+#define AB8500_INT_ID_WAKEUP_F		96
+#define AB8500_INT_ID_DET_R1F		98
+#define AB8500_INT_ID_DET_R2F		99
+#define AB8500_INT_ID_DET_R3F		100
+#define AB8500_INT_ID_DET_R4F		101
+#define AB8500_INT_USB_CHG_DET_DONE	102
+#define AB8500_INT_USB_CH_TH_PROT_F	104
+#define AB8500_INT_USB_CH_TH_PROT_R    105
+#define AB8500_INT_MAIN_CH_TH_PROT_F   106
+#define AB8500_INT_MAIN_CH_TH_PROT_R	107
+#define AB8500_INT_USB_CHARGER_NOT_OKF	111
+
+#define AB8500_NR_IRQS			112
+#define AB8500_NUM_IRQ_REGS		14
+
+/**
+ * struct ab8500 - ab8500 internal structure
+ * @dev: parent device
+ * @lock: read/write operations lock
+ * @irq_lock: genirq bus lock
+ * @irq: irq line
+ * @chip_id: chip revision id
+ * @write: register write
+ * @read: register read
+ * @rx_buf: rx buf for SPI
+ * @tx_buf: tx buf for SPI
+ * @mask: cache of IRQ regs for bus lock
+ * @oldmask: cache of previous IRQ regs for bus lock
+ */
+struct ab8500 {
+	struct device	*dev;
+	struct mutex	lock;
+	struct mutex	irq_lock;
+
+	int		irq_base;
+	int		irq;
+	u8		chip_id;
+
+	int (*write) (struct ab8500 *a8500, u16 addr, u8 data);
+	int (*read) (struct ab8500 *a8500, u16 addr);
+
+	unsigned long	tx_buf[4];
+	unsigned long	rx_buf[4];
+
+	u8 mask[AB8500_NUM_IRQ_REGS];
+	u8 oldmask[AB8500_NUM_IRQ_REGS];
+};
+
+struct regulator_reg_init;
+struct regulator_init_data;
+struct ab8500_gpio_platform_data;
+
+/**
+ * struct ab8500_platform_data - AB8500 platform data
+ * @irq_base: start of AB8500 IRQs, AB8500_NR_IRQS will be used
+ * @init: board-specific initialization after detection of ab8500
+ * @num_regulator_reg_init: number of regulator init registers
+ * @regulator_reg_init: regulator init registers
+ * @num_regulator: number of regulators
+ * @regulator: machine-specific constraints for regulators
+ */
+struct ab8500_platform_data {
+	int irq_base;
+	void (*init) (struct ab8500 *);
+	int num_regulator_reg_init;
+	struct ab8500_regulator_reg_init *regulator_reg_init;
+	int num_regulator;
+	struct regulator_init_data *regulator;
+	struct ab8500_gpio_platform_data *gpio;
+};
+
+extern int __devinit ab8500_init(struct ab8500 *ab8500);
+extern int __devexit ab8500_exit(struct ab8500 *ab8500);
+
+#endif /* MFD_AB8500_H */
-- 
cgit v1.2.3


From 26cc3ab984cd00e95cb58ba5aaea4238ea56c700 Mon Sep 17 00:00:00 2001
From: Igor Grinberg <grinberg@compulab.co.il>
Date: Sun, 13 Nov 2011 11:49:50 +0200
Subject: mfd: Add power off functionality to TWL

TWL family of PMICs, used in master mode, have a power off
functionality. The resulting power off sequence shuts down all the SoC
supplies, LDOs, etc. The sequence is described in the datasheets
chapter "Power-Off Sequence".
Note, that board must be wired correctly for the power off to work as
expected.

Signed-off-by: Igor Grinberg <grinberg@compulab.co.il>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl4030-power.c | 42 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/i2c/twl.h     |  2 ++
 2 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index a764676f0922..d905f5171153 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -34,7 +34,8 @@
 static u8 twl4030_start_script_address = 0x2b;
 
 #define PWR_P1_SW_EVENTS	0x10
-#define PWR_DEVOFF	(1<<0)
+#define PWR_DEVOFF		(1 << 0)
+#define SEQ_OFFSYNC		(1 << 0)
 
 #define PHY_TO_OFF_PM_MASTER(p)		(p - 0x36)
 #define PHY_TO_OFF_PM_RECEIVER(p)	(p - 0x5b)
@@ -511,12 +512,27 @@ int twl4030_remove_script(u8 flags)
 	return err;
 }
 
+/*
+ * In master mode, start the power off sequence.
+ * After a successful execution, TWL shuts down the power to the SoC
+ * and all peripherals connected to it.
+ */
+void twl4030_power_off(void)
+{
+	int err;
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, PWR_DEVOFF,
+			       TWL4030_PM_MASTER_P1_SW_EVENTS);
+	if (err)
+		pr_err("TWL4030 Unable to power off\n");
+}
+
 void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 {
 	int err = 0;
 	int i;
 	struct twl4030_resconfig *resconfig;
-	u8 address = twl4030_start_script_address;
+	u8 val, address = twl4030_start_script_address;
 
 	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
 			TWL4030_PM_MASTER_KEY_CFG1,
@@ -548,6 +564,28 @@ void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 		}
 	}
 
+	/* Board has to be wired properly to use this feature */
+	if (twl4030_scripts->use_poweroff && !pm_power_off) {
+		/* Default for SEQ_OFFSYNC is set, lets ensure this */
+		err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &val,
+				      TWL4030_PM_MASTER_CFG_P123_TRANSITION);
+		if (err) {
+			pr_warning("TWL4030 Unable to read registers\n");
+
+		} else if (!(val & SEQ_OFFSYNC)) {
+			val |= SEQ_OFFSYNC;
+			err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, val,
+					TWL4030_PM_MASTER_CFG_P123_TRANSITION);
+			if (err) {
+				pr_err("TWL4030 Unable to setup SEQ_OFFSYNC\n");
+				goto relock;
+			}
+		}
+
+		pm_power_off = twl4030_power_off;
+	}
+
+relock:
 	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0,
 			TWL4030_PM_MASTER_PROTECT_KEY);
 	if (err)
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 114c0f6fc63d..78d3465251d6 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -652,10 +652,12 @@ struct twl4030_power_data {
 	unsigned num;
 	struct twl4030_resconfig *resource_config;
 #define TWL4030_RESCONFIG_UNDEF	((u8)-1)
+	bool use_poweroff;	/* Board is wired for TWL poweroff */
 };
 
 extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
 extern int twl4030_remove_script(u8 flags);
+extern void twl4030_power_off(void);
 
 struct twl4030_codec_data {
 	unsigned int digimic_delay; /* in ms */
-- 
cgit v1.2.3


From f6dd2db940a1a0c6b9f7112109115c8243ba752b Mon Sep 17 00:00:00 2001
From: Donggeun Kim <dg77.kim@samsung.com>
Date: Wed, 14 Dec 2011 18:23:55 +0900
Subject: mfd: Add platform data and devices for MAX8997 LED control

MAX8997 device does not support LED control function of it.
To enable MAX8997-LED driver, platform data and devices for LED are updated.

Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/max8997.c       |  3 ++-
 include/linux/mfd/max8997.h | 25 ++++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c
index 5be53ae9b61c..cb83a7ab53e7 100644
--- a/drivers/mfd/max8997.c
+++ b/drivers/mfd/max8997.c
@@ -43,7 +43,8 @@ static struct mfd_cell max8997_devs[] = {
 	{ .name = "max8997-battery", },
 	{ .name = "max8997-haptic", },
 	{ .name = "max8997-muic", },
-	{ .name = "max8997-flash", },
+	{ .name = "max8997-led", .id = 1 },
+	{ .name = "max8997-led", .id = 2 },
 };
 
 int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest)
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 49d2a0bfd7fe..fff590521e50 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -131,6 +131,28 @@ struct max8997_muic_platform_data {
 	int num_init_data;
 };
 
+enum max8997_led_mode {
+	MAX8997_NONE,
+	MAX8997_FLASH_MODE,
+	MAX8997_MOVIE_MODE,
+	MAX8997_FLASH_PIN_CONTROL_MODE,
+	MAX8997_MOVIE_PIN_CONTROL_MODE,
+};
+
+/**
+ *  struct max8997_led_platform_data
+ *  The number of LED devices for MAX8997 is two
+ *  @mode: LED mode for each LED device
+ *  @brightness: initial brightness for each LED device
+ *	range:
+ *	[0 - 31]: MAX8997_FLASH_MODE and MAX8997_FLASH_PIN_CONTROL_MODE
+ *	[0 - 15]: MAX8997_MOVIE_MODE and MAX8997_MOVIE_PIN_CONTROL_MODE
+ */
+struct max8997_led_platform_data {
+	enum max8997_led_mode mode[2];
+	u8 brightness[2];
+};
+
 struct max8997_platform_data {
 	/* IRQ */
 	int irq_base;
@@ -172,7 +194,8 @@ struct max8997_platform_data {
 
 	/* HAPTIC: Not implemented */
 	/* RTC: Not implemented */
-	/* Flash: Not implemented */
+	/* ---- LED ---- */
+	struct max8997_led_platform_data *led_pdata;
 };
 
 #endif /* __LINUX_MFD_MAX8998_H */
-- 
cgit v1.2.3


From 1a6e4b7415339e3b11a87cff0d701b8a2e55f062 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 17 Nov 2011 11:02:20 +0530
Subject: mfd: Separate out STMPE controller and interface specific code

Few STMPE controller can have register interface over SPI or I2C. Current
implementation only supports I2C and all code is present in a single file
stmpe.c. It would be better to separate out I2C interface specific code from
controller specific code. Later SPI specific code can be added in a separate
file.

This patch separates out I2C and controller specific code into separate files,
making stmpe.c independent of I2C.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig       |  11 ++++
 drivers/mfd/Makefile      |   1 +
 drivers/mfd/stmpe-i2c.c   | 107 +++++++++++++++++++++++++++++++++++++
 drivers/mfd/stmpe.c       | 133 +++++++++++++++-------------------------------
 drivers/mfd/stmpe.h       |  33 ++++++++++++
 include/linux/mfd/stmpe.h |   7 ++-
 6 files changed, 200 insertions(+), 92 deletions(-)
 create mode 100644 drivers/mfd/stmpe-i2c.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 08a3e087bcea..7bc55819ab4a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -279,6 +279,17 @@ config MFD_STMPE
 		Keypad: stmpe-keypad
 		Touchscreen: stmpe-ts
 
+menu "STMPE Interface Drivers"
+depends on MFD_STMPE
+
+config STMPE_I2C
+	bool "STMPE I2C Inteface"
+	depends on I2C
+	default y
+	help
+	  This is used to enable I2C interface of STMPE
+endmenu
+
 config MFD_TC3589X
 	bool "Support Toshiba TC35892 and variants"
 	depends on I2C=y && GENERIC_HARDIRQS
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index b2292eb75242..5eb90a70c1a5 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 obj-$(CONFIG_MFD_TI_SSP)	+= ti-ssp.o
 
 obj-$(CONFIG_MFD_STMPE)		+= stmpe.o
+obj-$(CONFIG_STMPE_I2C)		+= stmpe-i2c.o
 obj-$(CONFIG_MFD_TC3589X)	+= tc3589x.o
 obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
 obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o tmio_core.o
diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
new file mode 100644
index 000000000000..0a4365902e36
--- /dev/null
+++ b/drivers/mfd/stmpe-i2c.c
@@ -0,0 +1,107 @@
+/*
+ * ST Microelectronics MFD: stmpe's i2c client specific driver
+ *
+ * Copyright (C) ST-Ericsson SA 2010
+ * Copyright (C) ST Microelectronics SA 2011
+ *
+ * License Terms: GNU General Public License, version 2
+ * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
+ * Author: Viresh Kumar <viresh.kumar@st.com> for ST Microelectronics
+ */
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include "stmpe.h"
+
+static int i2c_reg_read(struct stmpe *stmpe, u8 reg)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_read_byte_data(i2c, reg);
+}
+
+static int i2c_reg_write(struct stmpe *stmpe, u8 reg, u8 val)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_write_byte_data(i2c, reg, val);
+}
+
+static int i2c_block_read(struct stmpe *stmpe, u8 reg, u8 length, u8 *values)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_read_i2c_block_data(i2c, reg, length, values);
+}
+
+static int i2c_block_write(struct stmpe *stmpe, u8 reg, u8 length,
+		const u8 *values)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_write_i2c_block_data(i2c, reg, length, values);
+}
+
+static struct stmpe_client_info i2c_ci = {
+	.read_byte = i2c_reg_read,
+	.write_byte = i2c_reg_write,
+	.read_block = i2c_block_read,
+	.write_block = i2c_block_write,
+};
+
+static int __devinit
+stmpe_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
+{
+	i2c_ci.data = (void *)id;
+	i2c_ci.irq = i2c->irq;
+	i2c_ci.client = i2c;
+	i2c_ci.dev = &i2c->dev;
+
+	return stmpe_probe(&i2c_ci, id->driver_data);
+}
+
+static int __devexit stmpe_i2c_remove(struct i2c_client *i2c)
+{
+	struct stmpe *stmpe = dev_get_drvdata(&i2c->dev);
+
+	return stmpe_remove(stmpe);
+}
+
+static const struct i2c_device_id stmpe_i2c_id[] = {
+	{ "stmpe811", STMPE811 },
+	{ "stmpe1601", STMPE1601 },
+	{ "stmpe2401", STMPE2401 },
+	{ "stmpe2403", STMPE2403 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, stmpe_id);
+
+static struct i2c_driver stmpe_i2c_driver = {
+	.driver.name	= "stmpe-i2c",
+	.driver.owner	= THIS_MODULE,
+#ifdef CONFIG_PM
+	.driver.pm	= &stmpe_dev_pm_ops,
+#endif
+	.probe		= stmpe_i2c_probe,
+	.remove		= __devexit_p(stmpe_i2c_remove),
+	.id_table	= stmpe_i2c_id,
+};
+
+static int __init stmpe_init(void)
+{
+	return i2c_add_driver(&stmpe_i2c_driver);
+}
+subsys_initcall(stmpe_init);
+
+static void __exit stmpe_exit(void)
+{
+	i2c_del_driver(&stmpe_i2c_driver);
+}
+module_exit(stmpe_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("STMPE MFD I2C Interface Driver");
+MODULE_AUTHOR("Rabin Vincent <rabin.vincent@stericsson.com>");
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 39efa629a19d..83bacde6a7cb 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -1,4 +1,6 @@
 /*
+ * ST Microelectronics MFD: stmpe's driver
+ *
  * Copyright (C) ST-Ericsson SA 2010
  *
  * License Terms: GNU General Public License, version 2
@@ -7,13 +9,11 @@
 
 #include <linux/gpio.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/pm.h>
 #include <linux/slab.h>
-#include <linux/i2c.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/stmpe.h>
 #include "stmpe.h"
 
 static int __stmpe_enable(struct stmpe *stmpe, unsigned int blocks)
@@ -30,10 +30,9 @@ static int __stmpe_reg_read(struct stmpe *stmpe, u8 reg)
 {
 	int ret;
 
-	ret = i2c_smbus_read_byte_data(stmpe->i2c, reg);
+	ret = stmpe->ci->read_byte(stmpe, reg);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to read reg %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to read reg %#x: %d\n", reg, ret);
 
 	dev_vdbg(stmpe->dev, "rd: reg %#x => data %#x\n", reg, ret);
 
@@ -46,10 +45,9 @@ static int __stmpe_reg_write(struct stmpe *stmpe, u8 reg, u8 val)
 
 	dev_vdbg(stmpe->dev, "wr: reg %#x <= %#x\n", reg, val);
 
-	ret = i2c_smbus_write_byte_data(stmpe->i2c, reg, val);
+	ret = stmpe->ci->write_byte(stmpe, reg, val);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to write reg %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to write reg %#x: %d\n", reg, ret);
 
 	return ret;
 }
@@ -73,10 +71,9 @@ static int __stmpe_block_read(struct stmpe *stmpe, u8 reg, u8 length,
 {
 	int ret;
 
-	ret = i2c_smbus_read_i2c_block_data(stmpe->i2c, reg, length, values);
+	ret = stmpe->ci->read_block(stmpe, reg, length, values);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to read regs %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to read regs %#x: %d\n", reg, ret);
 
 	dev_vdbg(stmpe->dev, "rd: reg %#x (%d) => ret %#x\n", reg, length, ret);
 	stmpe_dump_bytes("stmpe rd: ", values, length);
@@ -92,11 +89,9 @@ static int __stmpe_block_write(struct stmpe *stmpe, u8 reg, u8 length,
 	dev_vdbg(stmpe->dev, "wr: regs %#x (%d)\n", reg, length);
 	stmpe_dump_bytes("stmpe wr: ", values, length);
 
-	ret = i2c_smbus_write_i2c_block_data(stmpe->i2c, reg, length,
-					     values);
+	ret = stmpe->ci->write_block(stmpe, reg, length, values);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to write regs %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to write regs %#x: %d\n", reg, ret);
 
 	return ret;
 }
@@ -874,34 +869,10 @@ static int __devinit stmpe_devices_init(struct stmpe *stmpe)
 	return ret;
 }
 
-#ifdef CONFIG_PM
-static int stmpe_suspend(struct device *dev)
-{
-	struct i2c_client *i2c = to_i2c_client(dev);
-	struct stmpe *stmpe = i2c_get_clientdata(i2c);
-
-	if (device_may_wakeup(&i2c->dev))
-		enable_irq_wake(stmpe->irq);
-
-	return 0;
-}
-
-static int stmpe_resume(struct device *dev)
+/* Called from client specific probe routines */
+int stmpe_probe(struct stmpe_client_info *ci, int partnum)
 {
-	struct i2c_client *i2c = to_i2c_client(dev);
-	struct stmpe *stmpe = i2c_get_clientdata(i2c);
-
-	if (device_may_wakeup(&i2c->dev))
-		disable_irq_wake(stmpe->irq);
-
-	return 0;
-}
-#endif
-
-static int __devinit stmpe_probe(struct i2c_client *i2c,
-				 const struct i2c_device_id *id)
-{
-	struct stmpe_platform_data *pdata = i2c->dev.platform_data;
+	struct stmpe_platform_data *pdata = dev_get_platdata(ci->dev);
 	struct stmpe *stmpe;
 	int ret;
 
@@ -915,18 +886,19 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 	mutex_init(&stmpe->irq_lock);
 	mutex_init(&stmpe->lock);
 
-	stmpe->dev = &i2c->dev;
-	stmpe->i2c = i2c;
-
+	stmpe->dev = ci->dev;
+	stmpe->client = ci->client;
 	stmpe->pdata = pdata;
 	stmpe->irq_base = pdata->irq_base;
-
-	stmpe->partnum = id->driver_data;
-	stmpe->variant = stmpe_variant_info[stmpe->partnum];
+	stmpe->ci = ci;
+	stmpe->partnum = partnum;
+	stmpe->variant = stmpe_variant_info[partnum];
 	stmpe->regs = stmpe->variant->regs;
 	stmpe->num_gpios = stmpe->variant->num_gpios;
+	dev_set_drvdata(stmpe->dev, stmpe);
 
-	i2c_set_clientdata(i2c, stmpe);
+	if (ci->init)
+		ci->init(stmpe);
 
 	if (pdata->irq_over_gpio) {
 		ret = gpio_request_one(pdata->irq_gpio, GPIOF_DIR_IN, "stmpe");
@@ -938,7 +910,7 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 
 		stmpe->irq = gpio_to_irq(pdata->irq_gpio);
 	} else {
-		stmpe->irq = i2c->irq;
+		stmpe->irq = ci->irq;
 	}
 
 	ret = stmpe_chip_init(stmpe);
@@ -950,8 +922,7 @@ static int __devinit stmpe_probe(struct i2c_client *i2c,
 		goto free_gpio;
 
 	ret = request_threaded_irq(stmpe->irq, NULL, stmpe_irq,
-				   pdata->irq_trigger | IRQF_ONESHOT,
-				   "stmpe", stmpe);
+			pdata->irq_trigger | IRQF_ONESHOT, "stmpe", stmpe);
 	if (ret) {
 		dev_err(stmpe->dev, "failed to request IRQ: %d\n", ret);
 		goto out_removeirq;
@@ -978,10 +949,8 @@ out_free:
 	return ret;
 }
 
-static int __devexit stmpe_remove(struct i2c_client *client)
+int stmpe_remove(struct stmpe *stmpe)
 {
-	struct stmpe *stmpe = i2c_get_clientdata(client);
-
 	mfd_remove_devices(stmpe->dev);
 
 	free_irq(stmpe->irq, stmpe);
@@ -995,45 +964,29 @@ static int __devexit stmpe_remove(struct i2c_client *client)
 	return 0;
 }
 
-static const struct i2c_device_id stmpe_id[] = {
-	{ "stmpe811", STMPE811 },
-	{ "stmpe1601", STMPE1601 },
-	{ "stmpe2401", STMPE2401 },
-	{ "stmpe2403", STMPE2403 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, stmpe_id);
-
 #ifdef CONFIG_PM
-static const struct dev_pm_ops stmpe_dev_pm_ops = {
-	.suspend	= stmpe_suspend,
-	.resume		= stmpe_resume,
-};
-#endif
+static int stmpe_suspend(struct device *dev)
+{
+	struct stmpe *stmpe = dev_get_drvdata(dev);
 
-static struct i2c_driver stmpe_driver = {
-	.driver.name	= "stmpe",
-	.driver.owner	= THIS_MODULE,
-#ifdef CONFIG_PM
-	.driver.pm	= &stmpe_dev_pm_ops,
-#endif
-	.probe		= stmpe_probe,
-	.remove		= __devexit_p(stmpe_remove),
-	.id_table	= stmpe_id,
-};
+	if (device_may_wakeup(dev))
+		enable_irq_wake(stmpe->irq);
 
-static int __init stmpe_init(void)
-{
-	return i2c_add_driver(&stmpe_driver);
+	return 0;
 }
-subsys_initcall(stmpe_init);
 
-static void __exit stmpe_exit(void)
+static int stmpe_resume(struct device *dev)
 {
-	i2c_del_driver(&stmpe_driver);
+	struct stmpe *stmpe = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev))
+		disable_irq_wake(stmpe->irq);
+
+	return 0;
 }
-module_exit(stmpe_exit);
 
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("STMPE MFD core driver");
-MODULE_AUTHOR("Rabin Vincent <rabin.vincent@stericsson.com>");
+const struct dev_pm_ops stmpe_dev_pm_ops = {
+	.suspend	= stmpe_suspend,
+	.resume		= stmpe_resume,
+};
+#endif
diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h
index e4ee38956583..18d89a68ce40 100644
--- a/drivers/mfd/stmpe.h
+++ b/drivers/mfd/stmpe.h
@@ -8,6 +8,14 @@
 #ifndef __STMPE_H
 #define __STMPE_H
 
+#include <linux/device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/stmpe.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+
+extern const struct dev_pm_ops stmpe_dev_pm_ops;
+
 #ifdef STMPE_DUMP_BYTES
 static inline void stmpe_dump_bytes(const char *str, const void *buf,
 				    size_t len)
@@ -67,6 +75,31 @@ struct stmpe_variant_info {
 	int (*enable_autosleep)(struct stmpe *stmpe, int autosleep_timeout);
 };
 
+/**
+ * struct stmpe_client_info - i2c or spi specific routines/info
+ * @data: client specific data
+ * @read_byte: read single byte
+ * @write_byte: write single byte
+ * @read_block: read block or multiple bytes
+ * @write_block: write block or multiple bytes
+ * @init: client init routine, called during probe
+ */
+struct stmpe_client_info {
+	void *data;
+	int irq;
+	void *client;
+	struct device *dev;
+	int (*read_byte)(struct stmpe *stmpe, u8 reg);
+	int (*write_byte)(struct stmpe *stmpe, u8 reg, u8 val);
+	int (*read_block)(struct stmpe *stmpe, u8 reg, u8 len, u8 *values);
+	int (*write_block)(struct stmpe *stmpe, u8 reg, u8 len,
+			const u8 *values);
+	void (*init)(struct stmpe *stmpe);
+};
+
+int stmpe_probe(struct stmpe_client_info *ci, int partnum);
+int stmpe_remove(struct stmpe *stmpe);
+
 #define STMPE_ICR_LSB_HIGH	(1 << 2)
 #define STMPE_ICR_LSB_EDGE	(1 << 1)
 #define STMPE_ICR_LSB_GIM	(1 << 0)
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 270d6613aadf..babc6b2857d3 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -50,13 +50,15 @@ enum {
 
 
 struct stmpe_variant_info;
+struct stmpe_client_info;
 
 /**
  * struct stmpe - STMPE MFD structure
  * @lock: lock protecting I/O operations
  * @irq_lock: IRQ bus lock
  * @dev: device, mostly for dev_dbg()
- * @i2c: i2c client
+ * @client: client - i2c or spi
+ * @ci: client specific information
  * @partnum: part number
  * @variant: the detected STMPE model number
  * @regs: list of addresses of registers which are at different addresses on
@@ -72,7 +74,8 @@ struct stmpe {
 	struct mutex lock;
 	struct mutex irq_lock;
 	struct device *dev;
-	struct i2c_client *i2c;
+	void *client;
+	struct stmpe_client_info *ci;
 	enum stmpe_partnum partnum;
 	struct stmpe_variant_info *variant;
 	const u8 *regs;
-- 
cgit v1.2.3


From 1cda2394e95415f1469ab8eaffd081395e112551 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 17 Nov 2011 11:02:22 +0530
Subject: mfd: Add support for stmpe variant 610

STMPE610 is very much like STMPE811, except the number of gpio pins, which is 8
in 811 and 6 in 610. This patch adds support for variant 610. STMPE610 will
share most of the code with STMPE811.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe-i2c.c   |  1 +
 drivers/mfd/stmpe-spi.c   |  1 +
 drivers/mfd/stmpe.c       | 20 ++++++++++++++++++--
 include/linux/mfd/stmpe.h |  1 +
 4 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
index 0a4365902e36..b11d33b1c892 100644
--- a/drivers/mfd/stmpe-i2c.c
+++ b/drivers/mfd/stmpe-i2c.c
@@ -71,6 +71,7 @@ static int __devexit stmpe_i2c_remove(struct i2c_client *i2c)
 }
 
 static const struct i2c_device_id stmpe_i2c_id[] = {
+	{ "stmpe610", STMPE610 },
 	{ "stmpe811", STMPE811 },
 	{ "stmpe1601", STMPE1601 },
 	{ "stmpe2401", STMPE2401 },
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index 53efce4fe294..46963a5d569f 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -109,6 +109,7 @@ static int __devexit stmpe_spi_remove(struct spi_device *spi)
 }
 
 static const struct spi_device_id stmpe_spi_id[] = {
+	{ "stmpe610", STMPE610 },
 	{ "stmpe811", STMPE811 },
 	{ "stmpe1601", STMPE1601 },
 	{ "stmpe2401", STMPE2401 },
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 83bacde6a7cb..67ff3dc5bb45 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -321,7 +321,7 @@ static struct mfd_cell stmpe_keypad_cell = {
 };
 
 /*
- * Touchscreen (STMPE811)
+ * Touchscreen (STMPE811 or STMPE610)
  */
 
 static struct resource stmpe_ts_resources[] = {
@@ -346,7 +346,7 @@ static struct mfd_cell stmpe_ts_cell = {
 };
 
 /*
- * STMPE811
+ * STMPE811 or STMPE610
  */
 
 static const u8 stmpe811_regs[] = {
@@ -417,6 +417,21 @@ static struct stmpe_variant_info stmpe811 = {
 	.get_altfunc	= stmpe811_get_altfunc,
 };
 
+/* Similar to 811, except number of gpios */
+static struct stmpe_variant_info stmpe610 = {
+	.name		= "stmpe610",
+	.id_val		= 0x0811,
+	.id_mask	= 0xffff,
+	.num_gpios	= 6,
+	.af_bits	= 1,
+	.regs		= stmpe811_regs,
+	.blocks		= stmpe811_blocks,
+	.num_blocks	= ARRAY_SIZE(stmpe811_blocks),
+	.num_irqs	= STMPE811_NR_INTERNAL_IRQS,
+	.enable		= stmpe811_enable,
+	.get_altfunc	= stmpe811_get_altfunc,
+};
+
 /*
  * STMPE1601
  */
@@ -651,6 +666,7 @@ static struct stmpe_variant_info stmpe2403 = {
 };
 
 static struct stmpe_variant_info *stmpe_variant_info[] = {
+	[STMPE610]	= &stmpe610,
 	[STMPE811]	= &stmpe811,
 	[STMPE1601]	= &stmpe1601,
 	[STMPE2401]	= &stmpe2401,
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index babc6b2857d3..342005afd347 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -20,6 +20,7 @@ enum stmpe_block {
 };
 
 enum stmpe_partnum {
+	STMPE610,
 	STMPE811,
 	STMPE1601,
 	STMPE2401,
-- 
cgit v1.2.3


From 7f7f4ea15ef4645f3888310a7a761fc2c4f689c9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 17 Nov 2011 11:02:23 +0530
Subject: mfd: Add support for stmpe variant 801

STMPE801 is a GPIO expander. Registers for 801 are much different from other
variants. This patch adds support for STMPE801 in stmpe mfd driver.

Signed-off-by: Bhupesh Sharma <bhupesh.sharma@st.com>
Signed-off-by: Pratyush Anand <pratyush.anand@st.com>
Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/stmpe-i2c.c   |  1 +
 drivers/mfd/stmpe-spi.c   |  1 +
 drivers/mfd/stmpe.c       | 97 ++++++++++++++++++++++++++++++++++++++++-------
 drivers/mfd/stmpe.h       | 19 ++++++++++
 include/linux/mfd/stmpe.h |  1 +
 5 files changed, 106 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
index b11d33b1c892..373f423b1181 100644
--- a/drivers/mfd/stmpe-i2c.c
+++ b/drivers/mfd/stmpe-i2c.c
@@ -72,6 +72,7 @@ static int __devexit stmpe_i2c_remove(struct i2c_client *i2c)
 
 static const struct i2c_device_id stmpe_i2c_id[] = {
 	{ "stmpe610", STMPE610 },
+	{ "stmpe801", STMPE801 },
 	{ "stmpe811", STMPE811 },
 	{ "stmpe1601", STMPE1601 },
 	{ "stmpe2401", STMPE2401 },
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index 46963a5d569f..b58c43c7ea93 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -110,6 +110,7 @@ static int __devexit stmpe_spi_remove(struct spi_device *spi)
 
 static const struct spi_device_id stmpe_spi_id[] = {
 	{ "stmpe610", STMPE610 },
+	{ "stmpe801", STMPE801 },
 	{ "stmpe811", STMPE811 },
 	{ "stmpe1601", STMPE1601 },
 	{ "stmpe2401", STMPE2401 },
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 67ff3dc5bb45..fc2c6afb31e1 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -241,12 +241,14 @@ int stmpe_set_altfunc(struct stmpe *stmpe, u32 pins, enum stmpe_block block)
 	u8 regaddr = stmpe->regs[STMPE_IDX_GPAFR_U_MSB];
 	int af_bits = variant->af_bits;
 	int numregs = DIV_ROUND_UP(stmpe->num_gpios * af_bits, 8);
-	int afperreg = 8 / af_bits;
 	int mask = (1 << af_bits) - 1;
 	u8 regs[numregs];
-	int af;
-	int ret;
+	int af, afperreg, ret;
+
+	if (!variant->get_altfunc)
+		return 0;
 
+	afperreg = 8 / af_bits;
 	mutex_lock(&stmpe->lock);
 
 	ret = __stmpe_enable(stmpe, STMPE_BLOCK_GPIO);
@@ -320,6 +322,50 @@ static struct mfd_cell stmpe_keypad_cell = {
 	.num_resources	= ARRAY_SIZE(stmpe_keypad_resources),
 };
 
+/*
+ * STMPE801
+ */
+static const u8 stmpe801_regs[] = {
+	[STMPE_IDX_CHIP_ID]	= STMPE801_REG_CHIP_ID,
+	[STMPE_IDX_ICR_LSB]	= STMPE801_REG_SYS_CTRL,
+	[STMPE_IDX_GPMR_LSB]	= STMPE801_REG_GPIO_MP_STA,
+	[STMPE_IDX_GPSR_LSB]	= STMPE801_REG_GPIO_SET_PIN,
+	[STMPE_IDX_GPCR_LSB]	= STMPE801_REG_GPIO_SET_PIN,
+	[STMPE_IDX_GPDR_LSB]	= STMPE801_REG_GPIO_DIR,
+	[STMPE_IDX_IEGPIOR_LSB] = STMPE801_REG_GPIO_INT_EN,
+	[STMPE_IDX_ISGPIOR_MSB] = STMPE801_REG_GPIO_INT_STA,
+
+};
+
+static struct stmpe_variant_block stmpe801_blocks[] = {
+	{
+		.cell	= &stmpe_gpio_cell,
+		.irq	= 0,
+		.block	= STMPE_BLOCK_GPIO,
+	},
+};
+
+static int stmpe801_enable(struct stmpe *stmpe, unsigned int blocks,
+			   bool enable)
+{
+	if (blocks & STMPE_BLOCK_GPIO)
+		return 0;
+	else
+		return -EINVAL;
+}
+
+static struct stmpe_variant_info stmpe801 = {
+	.name		= "stmpe801",
+	.id_val		= STMPE801_ID,
+	.id_mask	= 0xffff,
+	.num_gpios	= 8,
+	.regs		= stmpe801_regs,
+	.blocks		= stmpe801_blocks,
+	.num_blocks	= ARRAY_SIZE(stmpe801_blocks),
+	.num_irqs	= STMPE801_NR_INTERNAL_IRQS,
+	.enable		= stmpe801_enable,
+};
+
 /*
  * Touchscreen (STMPE811 or STMPE610)
  */
@@ -667,6 +713,7 @@ static struct stmpe_variant_info stmpe2403 = {
 
 static struct stmpe_variant_info *stmpe_variant_info[] = {
 	[STMPE610]	= &stmpe610,
+	[STMPE801]	= &stmpe801,
 	[STMPE811]	= &stmpe811,
 	[STMPE1601]	= &stmpe1601,
 	[STMPE2401]	= &stmpe2401,
@@ -683,6 +730,11 @@ static irqreturn_t stmpe_irq(int irq, void *data)
 	int ret;
 	int i;
 
+	if (variant->id_val == STMPE801_ID) {
+		handle_nested_irq(stmpe->irq_base);
+		return IRQ_HANDLED;
+	}
+
 	ret = stmpe_block_read(stmpe, israddr, num, isr);
 	if (ret < 0)
 		return IRQ_NONE;
@@ -769,14 +821,17 @@ static struct irq_chip stmpe_irq_chip = {
 
 static int __devinit stmpe_irq_init(struct stmpe *stmpe)
 {
+	struct irq_chip *chip = NULL;
 	int num_irqs = stmpe->variant->num_irqs;
 	int base = stmpe->irq_base;
 	int irq;
 
+	if (stmpe->variant->id_val != STMPE801_ID)
+		chip = &stmpe_irq_chip;
+
 	for (irq = base; irq < base + num_irqs; irq++) {
 		irq_set_chip_data(irq, stmpe);
-		irq_set_chip_and_handler(irq, &stmpe_irq_chip,
-					 handle_edge_irq);
+		irq_set_chip_and_handler(irq, chip, handle_edge_irq);
 		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
@@ -808,7 +863,7 @@ static int __devinit stmpe_chip_init(struct stmpe *stmpe)
 	unsigned int irq_trigger = stmpe->pdata->irq_trigger;
 	int autosleep_timeout = stmpe->pdata->autosleep_timeout;
 	struct stmpe_variant_info *variant = stmpe->variant;
-	u8 icr = STMPE_ICR_LSB_GIM;
+	u8 icr;
 	unsigned int id;
 	u8 data[2];
 	int ret;
@@ -831,16 +886,32 @@ static int __devinit stmpe_chip_init(struct stmpe *stmpe)
 	if (ret)
 		return ret;
 
-	if (irq_trigger == IRQF_TRIGGER_FALLING ||
-	    irq_trigger == IRQF_TRIGGER_RISING)
-		icr |= STMPE_ICR_LSB_EDGE;
+	if (id == STMPE801_ID)
+		icr = STMPE801_REG_SYS_CTRL_INT_EN;
+	else
+		icr = STMPE_ICR_LSB_GIM;
+
+	/* STMPE801 doesn't support Edge interrupts */
+	if (id != STMPE801_ID) {
+		if (irq_trigger == IRQF_TRIGGER_FALLING ||
+				irq_trigger == IRQF_TRIGGER_RISING)
+			icr |= STMPE_ICR_LSB_EDGE;
+	}
 
 	if (irq_trigger == IRQF_TRIGGER_RISING ||
-	    irq_trigger == IRQF_TRIGGER_HIGH)
-		icr |= STMPE_ICR_LSB_HIGH;
+			irq_trigger == IRQF_TRIGGER_HIGH) {
+		if (id == STMPE801_ID)
+			icr |= STMPE801_REG_SYS_CTRL_INT_HI;
+		else
+			icr |= STMPE_ICR_LSB_HIGH;
+	}
 
-	if (stmpe->pdata->irq_invert_polarity)
-		icr ^= STMPE_ICR_LSB_HIGH;
+	if (stmpe->pdata->irq_invert_polarity) {
+		if (id == STMPE801_ID)
+			icr ^= STMPE801_REG_SYS_CTRL_INT_HI;
+		else
+			icr ^= STMPE_ICR_LSB_HIGH;
+	}
 
 	if (stmpe->pdata->autosleep) {
 		ret = stmpe_autosleep(stmpe, autosleep_timeout);
diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h
index a73f4c1085f2..7b8e13f5b764 100644
--- a/drivers/mfd/stmpe.h
+++ b/drivers/mfd/stmpe.h
@@ -104,6 +104,25 @@ int stmpe_remove(struct stmpe *stmpe);
 #define STMPE_ICR_LSB_EDGE	(1 << 1)
 #define STMPE_ICR_LSB_GIM	(1 << 0)
 
+/*
+ * STMPE801
+ */
+#define STMPE801_ID			0x0108
+#define STMPE801_NR_INTERNAL_IRQS	1
+
+#define STMPE801_REG_CHIP_ID		0x00
+#define STMPE801_REG_VERSION_ID		0x02
+#define STMPE801_REG_SYS_CTRL		0x04
+#define STMPE801_REG_GPIO_INT_EN	0x08
+#define STMPE801_REG_GPIO_INT_STA	0x09
+#define STMPE801_REG_GPIO_MP_STA	0x10
+#define STMPE801_REG_GPIO_SET_PIN	0x11
+#define STMPE801_REG_GPIO_DIR		0x12
+
+#define STMPE801_REG_SYS_CTRL_RESET	(1 << 7)
+#define STMPE801_REG_SYS_CTRL_INT_EN	(1 << 2)
+#define STMPE801_REG_SYS_CTRL_INT_HI	(1 << 0)
+
 /*
  * STMPE811
  */
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 342005afd347..ca1d7a347600 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -21,6 +21,7 @@ enum stmpe_block {
 
 enum stmpe_partnum {
 	STMPE610,
+	STMPE801,
 	STMPE811,
 	STMPE1601,
 	STMPE2401,
-- 
cgit v1.2.3


From 0f5f70783eddde2bd277ae521fa04226cb1e249d Mon Sep 17 00:00:00 2001
From: Sangbeom Kim <sbkim73@samsung.com>
Date: Fri, 23 Dec 2011 17:28:08 +0900
Subject: mfd: Add S5M core driver

S5M series are pmic including mutiple functional devices.
It can support PMIC, RTC, Battery charger, codec.
This patch implement core driver for s5m series.

Signed-off-by: Sangbeom Kim <sbkim73@samsung.com>
Reviewed-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/s5m-core.c               | 176 +++++++++++++++++
 include/linux/mfd/s5m87xx/s5m-core.h | 373 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/s5m87xx/s5m-pmic.h | 100 ++++++++++
 include/linux/mfd/s5m87xx/s5m-rtc.h  |  84 ++++++++
 4 files changed, 733 insertions(+)
 create mode 100644 drivers/mfd/s5m-core.c
 create mode 100644 include/linux/mfd/s5m87xx/s5m-core.h
 create mode 100644 include/linux/mfd/s5m87xx/s5m-pmic.h
 create mode 100644 include/linux/mfd/s5m87xx/s5m-rtc.h

(limited to 'include/linux')

diff --git a/drivers/mfd/s5m-core.c b/drivers/mfd/s5m-core.c
new file mode 100644
index 000000000000..e075c113eec6
--- /dev/null
+++ b/drivers/mfd/s5m-core.c
@@ -0,0 +1,176 @@
+/*
+ * s5m87xx.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/mutex.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/s5m87xx/s5m-core.h>
+#include <linux/mfd/s5m87xx/s5m-pmic.h>
+#include <linux/mfd/s5m87xx/s5m-rtc.h>
+#include <linux/regmap.h>
+
+static struct mfd_cell s5m87xx_devs[] = {
+	{
+		.name = "s5m8767-pmic",
+	}, {
+		.name = "s5m-rtc",
+	},
+};
+
+int s5m_reg_read(struct s5m87xx_dev *s5m87xx, u8 reg, void *dest)
+{
+	return regmap_read(s5m87xx->regmap, reg, dest);
+}
+EXPORT_SYMBOL_GPL(s5m_reg_read);
+
+int s5m_bulk_read(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf)
+{
+	return regmap_bulk_read(s5m87xx->regmap, reg, buf, count);;
+}
+EXPORT_SYMBOL_GPL(s5m_bulk_read);
+
+int s5m_reg_write(struct s5m87xx_dev *s5m87xx, u8 reg, u8 value)
+{
+	return regmap_write(s5m87xx->regmap, reg, value);
+}
+EXPORT_SYMBOL_GPL(s5m_reg_write);
+
+int s5m_bulk_write(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf)
+{
+	return regmap_raw_write(s5m87xx->regmap, reg, buf, count * sizeof(u16));
+}
+EXPORT_SYMBOL_GPL(s5m_bulk_write);
+
+int s5m_reg_update(struct s5m87xx_dev *s5m87xx, u8 reg, u8 val, u8 mask)
+{
+	return regmap_update_bits(s5m87xx->regmap, reg, mask, val);
+}
+EXPORT_SYMBOL_GPL(s5m_reg_update);
+
+static struct regmap_config s5m_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static int s5m87xx_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct s5m_platform_data *pdata = i2c->dev.platform_data;
+	struct s5m87xx_dev *s5m87xx;
+	int ret = 0;
+	int error;
+
+	s5m87xx = kzalloc(sizeof(struct s5m87xx_dev), GFP_KERNEL);
+	if (s5m87xx == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, s5m87xx);
+	s5m87xx->dev = &i2c->dev;
+	s5m87xx->i2c = i2c;
+	s5m87xx->irq = i2c->irq;
+	s5m87xx->type = id->driver_data;
+
+	if (pdata) {
+		s5m87xx->device_type = pdata->device_type;
+		s5m87xx->ono = pdata->ono;
+		s5m87xx->irq_base = pdata->irq_base;
+		s5m87xx->wakeup = pdata->wakeup;
+	}
+
+	s5m87xx->regmap = regmap_init_i2c(i2c, &s5m_regmap_config);
+	if (IS_ERR(s5m87xx->regmap)) {
+		error = PTR_ERR(s5m87xx->regmap);
+		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+			error);
+		goto err;
+	}
+
+	s5m87xx->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
+	i2c_set_clientdata(s5m87xx->rtc, s5m87xx);
+
+	if (pdata->cfg_pmic_irq)
+		pdata->cfg_pmic_irq();
+
+	s5m_irq_init(s5m87xx);
+
+	pm_runtime_set_active(s5m87xx->dev);
+
+	ret = mfd_add_devices(s5m87xx->dev, -1,
+				s5m87xx_devs, ARRAY_SIZE(s5m87xx_devs),
+				NULL, 0);
+
+	if (ret < 0)
+		goto err;
+
+	return ret;
+
+err:
+	mfd_remove_devices(s5m87xx->dev);
+	s5m_irq_exit(s5m87xx);
+	i2c_unregister_device(s5m87xx->rtc);
+	regmap_exit(s5m87xx->regmap);
+	kfree(s5m87xx);
+	return ret;
+}
+
+static int s5m87xx_i2c_remove(struct i2c_client *i2c)
+{
+	struct s5m87xx_dev *s5m87xx = i2c_get_clientdata(i2c);
+
+	mfd_remove_devices(s5m87xx->dev);
+	s5m_irq_exit(s5m87xx);
+	i2c_unregister_device(s5m87xx->rtc);
+	regmap_exit(s5m87xx->regmap);
+	kfree(s5m87xx);
+	return 0;
+}
+
+static const struct i2c_device_id s5m87xx_i2c_id[] = {
+	{ "s5m87xx", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, s5m87xx_i2c_id);
+
+static struct i2c_driver s5m87xx_i2c_driver = {
+	.driver = {
+		   .name = "s5m87xx",
+		   .owner = THIS_MODULE,
+	},
+	.probe = s5m87xx_i2c_probe,
+	.remove = s5m87xx_i2c_remove,
+	.id_table = s5m87xx_i2c_id,
+};
+
+static int __init s5m87xx_i2c_init(void)
+{
+	return i2c_add_driver(&s5m87xx_i2c_driver);
+}
+
+subsys_initcall(s5m87xx_i2c_init);
+
+static void __exit s5m87xx_i2c_exit(void)
+{
+	i2c_del_driver(&s5m87xx_i2c_driver);
+}
+module_exit(s5m87xx_i2c_exit);
+
+MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");
+MODULE_DESCRIPTION("Core support for the S5M MFD");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/s5m87xx/s5m-core.h b/include/linux/mfd/s5m87xx/s5m-core.h
new file mode 100644
index 000000000000..a7480b57f92d
--- /dev/null
+++ b/include/linux/mfd/s5m87xx/s5m-core.h
@@ -0,0 +1,373 @@
+/*
+ * s5m-core.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MFD_S5M_CORE_H
+#define __LINUX_MFD_S5M_CORE_H
+
+#define NUM_IRQ_REGS	4
+
+enum s5m_device_type {
+	S5M8751X,
+	S5M8763X,
+	S5M8767X,
+};
+
+/* S5M8767 registers */
+enum s5m8767_reg {
+	S5M8767_REG_ID,
+	S5M8767_REG_INT1,
+	S5M8767_REG_INT2,
+	S5M8767_REG_INT3,
+	S5M8767_REG_INT1M,
+	S5M8767_REG_INT2M,
+	S5M8767_REG_INT3M,
+	S5M8767_REG_STATUS1,
+	S5M8767_REG_STATUS2,
+	S5M8767_REG_STATUS3,
+	S5M8767_REG_CTRL1,
+	S5M8767_REG_CTRL2,
+	S5M8767_REG_LOWBAT1,
+	S5M8767_REG_LOWBAT2,
+	S5M8767_REG_BUCHG,
+	S5M8767_REG_DVSRAMP,
+	S5M8767_REG_DVSTIMER2 = 0x10,
+	S5M8767_REG_DVSTIMER3,
+	S5M8767_REG_DVSTIMER4,
+	S5M8767_REG_LDO1,
+	S5M8767_REG_LDO2,
+	S5M8767_REG_LDO3,
+	S5M8767_REG_LDO4,
+	S5M8767_REG_LDO5,
+	S5M8767_REG_LDO6,
+	S5M8767_REG_LDO7,
+	S5M8767_REG_LDO8,
+	S5M8767_REG_LDO9,
+	S5M8767_REG_LDO10,
+	S5M8767_REG_LDO11,
+	S5M8767_REG_LDO12,
+	S5M8767_REG_LDO13,
+	S5M8767_REG_LDO14 = 0x20,
+	S5M8767_REG_LDO15,
+	S5M8767_REG_LDO16,
+	S5M8767_REG_LDO17,
+	S5M8767_REG_LDO18,
+	S5M8767_REG_LDO19,
+	S5M8767_REG_LDO20,
+	S5M8767_REG_LDO21,
+	S5M8767_REG_LDO22,
+	S5M8767_REG_LDO23,
+	S5M8767_REG_LDO24,
+	S5M8767_REG_LDO25,
+	S5M8767_REG_LDO26,
+	S5M8767_REG_LDO27,
+	S5M8767_REG_LDO28,
+	S5M8767_REG_UVLO = 0x31,
+	S5M8767_REG_BUCK1CTRL1,
+	S5M8767_REG_BUCK1CTRL2,
+	S5M8767_REG_BUCK2CTRL,
+	S5M8767_REG_BUCK2DVS1,
+	S5M8767_REG_BUCK2DVS2,
+	S5M8767_REG_BUCK2DVS3,
+	S5M8767_REG_BUCK2DVS4,
+	S5M8767_REG_BUCK2DVS5,
+	S5M8767_REG_BUCK2DVS6,
+	S5M8767_REG_BUCK2DVS7,
+	S5M8767_REG_BUCK2DVS8,
+	S5M8767_REG_BUCK3CTRL,
+	S5M8767_REG_BUCK3DVS1,
+	S5M8767_REG_BUCK3DVS2,
+	S5M8767_REG_BUCK3DVS3,
+	S5M8767_REG_BUCK3DVS4,
+	S5M8767_REG_BUCK3DVS5,
+	S5M8767_REG_BUCK3DVS6,
+	S5M8767_REG_BUCK3DVS7,
+	S5M8767_REG_BUCK3DVS8,
+	S5M8767_REG_BUCK4CTRL,
+	S5M8767_REG_BUCK4DVS1,
+	S5M8767_REG_BUCK4DVS2,
+	S5M8767_REG_BUCK4DVS3,
+	S5M8767_REG_BUCK4DVS4,
+	S5M8767_REG_BUCK4DVS5,
+	S5M8767_REG_BUCK4DVS6,
+	S5M8767_REG_BUCK4DVS7,
+	S5M8767_REG_BUCK4DVS8,
+	S5M8767_REG_BUCK5CTRL1,
+	S5M8767_REG_BUCK5CTRL2,
+	S5M8767_REG_BUCK5CTRL3,
+	S5M8767_REG_BUCK5CTRL4,
+	S5M8767_REG_BUCK5CTRL5,
+	S5M8767_REG_BUCK6CTRL1,
+	S5M8767_REG_BUCK6CTRL2,
+	S5M8767_REG_BUCK7CTRL1,
+	S5M8767_REG_BUCK7CTRL2,
+	S5M8767_REG_BUCK8CTRL1,
+	S5M8767_REG_BUCK8CTRL2,
+	S5M8767_REG_BUCK9CTRL1,
+	S5M8767_REG_BUCK9CTRL2,
+	S5M8767_REG_LDO1CTRL,
+	S5M8767_REG_LDO2_1CTRL,
+	S5M8767_REG_LDO2_2CTRL,
+	S5M8767_REG_LDO2_3CTRL,
+	S5M8767_REG_LDO2_4CTRL,
+	S5M8767_REG_LDO3CTRL,
+	S5M8767_REG_LDO4CTRL,
+	S5M8767_REG_LDO5CTRL,
+	S5M8767_REG_LDO6CTRL,
+	S5M8767_REG_LDO7CTRL,
+	S5M8767_REG_LDO8CTRL,
+	S5M8767_REG_LDO9CTRL,
+	S5M8767_REG_LDO10CTRL,
+	S5M8767_REG_LDO11CTRL,
+	S5M8767_REG_LDO12CTRL,
+	S5M8767_REG_LDO13CTRL,
+	S5M8767_REG_LDO14CTRL,
+	S5M8767_REG_LDO15CTRL,
+	S5M8767_REG_LDO16CTRL,
+	S5M8767_REG_LDO17CTRL,
+	S5M8767_REG_LDO18CTRL,
+	S5M8767_REG_LDO19CTRL,
+	S5M8767_REG_LDO20CTRL,
+	S5M8767_REG_LDO21CTRL,
+	S5M8767_REG_LDO22CTRL,
+	S5M8767_REG_LDO23CTRL,
+	S5M8767_REG_LDO24CTRL,
+	S5M8767_REG_LDO25CTRL,
+	S5M8767_REG_LDO26CTRL,
+	S5M8767_REG_LDO27CTRL,
+	S5M8767_REG_LDO28CTRL,
+};
+
+/* S5M8763 registers */
+enum s5m8763_reg {
+	S5M8763_REG_IRQ1,
+	S5M8763_REG_IRQ2,
+	S5M8763_REG_IRQ3,
+	S5M8763_REG_IRQ4,
+	S5M8763_REG_IRQM1,
+	S5M8763_REG_IRQM2,
+	S5M8763_REG_IRQM3,
+	S5M8763_REG_IRQM4,
+	S5M8763_REG_STATUS1,
+	S5M8763_REG_STATUS2,
+	S5M8763_REG_STATUSM1,
+	S5M8763_REG_STATUSM2,
+	S5M8763_REG_CHGR1,
+	S5M8763_REG_CHGR2,
+	S5M8763_REG_LDO_ACTIVE_DISCHARGE1,
+	S5M8763_REG_LDO_ACTIVE_DISCHARGE2,
+	S5M8763_REG_BUCK_ACTIVE_DISCHARGE3,
+	S5M8763_REG_ONOFF1,
+	S5M8763_REG_ONOFF2,
+	S5M8763_REG_ONOFF3,
+	S5M8763_REG_ONOFF4,
+	S5M8763_REG_BUCK1_VOLTAGE1,
+	S5M8763_REG_BUCK1_VOLTAGE2,
+	S5M8763_REG_BUCK1_VOLTAGE3,
+	S5M8763_REG_BUCK1_VOLTAGE4,
+	S5M8763_REG_BUCK2_VOLTAGE1,
+	S5M8763_REG_BUCK2_VOLTAGE2,
+	S5M8763_REG_BUCK3,
+	S5M8763_REG_BUCK4,
+	S5M8763_REG_LDO1_LDO2,
+	S5M8763_REG_LDO3,
+	S5M8763_REG_LDO4,
+	S5M8763_REG_LDO5,
+	S5M8763_REG_LDO6,
+	S5M8763_REG_LDO7,
+	S5M8763_REG_LDO7_LDO8,
+	S5M8763_REG_LDO9_LDO10,
+	S5M8763_REG_LDO11,
+	S5M8763_REG_LDO12,
+	S5M8763_REG_LDO13,
+	S5M8763_REG_LDO14,
+	S5M8763_REG_LDO15,
+	S5M8763_REG_LDO16,
+	S5M8763_REG_BKCHR,
+	S5M8763_REG_LBCNFG1,
+	S5M8763_REG_LBCNFG2,
+};
+
+enum s5m8767_irq {
+	S5M8767_IRQ_PWRR,
+	S5M8767_IRQ_PWRF,
+	S5M8767_IRQ_PWR1S,
+	S5M8767_IRQ_JIGR,
+	S5M8767_IRQ_JIGF,
+	S5M8767_IRQ_LOWBAT2,
+	S5M8767_IRQ_LOWBAT1,
+
+	S5M8767_IRQ_MRB,
+	S5M8767_IRQ_DVSOK2,
+	S5M8767_IRQ_DVSOK3,
+	S5M8767_IRQ_DVSOK4,
+
+	S5M8767_IRQ_RTC60S,
+	S5M8767_IRQ_RTCA1,
+	S5M8767_IRQ_RTCA2,
+	S5M8767_IRQ_SMPL,
+	S5M8767_IRQ_RTC1S,
+	S5M8767_IRQ_WTSR,
+
+	S5M8767_IRQ_NR,
+};
+
+#define S5M8767_IRQ_PWRR_MASK		(1 << 0)
+#define S5M8767_IRQ_PWRF_MASK		(1 << 1)
+#define S5M8767_IRQ_PWR1S_MASK		(1 << 3)
+#define S5M8767_IRQ_JIGR_MASK		(1 << 4)
+#define S5M8767_IRQ_JIGF_MASK		(1 << 5)
+#define S5M8767_IRQ_LOWBAT2_MASK	(1 << 6)
+#define S5M8767_IRQ_LOWBAT1_MASK	(1 << 7)
+
+#define S5M8767_IRQ_MRB_MASK		(1 << 2)
+#define S5M8767_IRQ_DVSOK2_MASK		(1 << 3)
+#define S5M8767_IRQ_DVSOK3_MASK		(1 << 4)
+#define S5M8767_IRQ_DVSOK4_MASK		(1 << 5)
+
+#define S5M8767_IRQ_RTC60S_MASK		(1 << 0)
+#define S5M8767_IRQ_RTCA1_MASK		(1 << 1)
+#define S5M8767_IRQ_RTCA2_MASK		(1 << 2)
+#define S5M8767_IRQ_SMPL_MASK		(1 << 3)
+#define S5M8767_IRQ_RTC1S_MASK		(1 << 4)
+#define S5M8767_IRQ_WTSR_MASK		(1 << 5)
+
+enum s5m8763_irq {
+	S5M8763_IRQ_DCINF,
+	S5M8763_IRQ_DCINR,
+	S5M8763_IRQ_JIGF,
+	S5M8763_IRQ_JIGR,
+	S5M8763_IRQ_PWRONF,
+	S5M8763_IRQ_PWRONR,
+
+	S5M8763_IRQ_WTSREVNT,
+	S5M8763_IRQ_SMPLEVNT,
+	S5M8763_IRQ_ALARM1,
+	S5M8763_IRQ_ALARM0,
+
+	S5M8763_IRQ_ONKEY1S,
+	S5M8763_IRQ_TOPOFFR,
+	S5M8763_IRQ_DCINOVPR,
+	S5M8763_IRQ_CHGRSTF,
+	S5M8763_IRQ_DONER,
+	S5M8763_IRQ_CHGFAULT,
+
+	S5M8763_IRQ_LOBAT1,
+	S5M8763_IRQ_LOBAT2,
+
+	S5M8763_IRQ_NR,
+};
+
+#define S5M8763_IRQ_DCINF_MASK		(1 << 2)
+#define S5M8763_IRQ_DCINR_MASK		(1 << 3)
+#define S5M8763_IRQ_JIGF_MASK		(1 << 4)
+#define S5M8763_IRQ_JIGR_MASK		(1 << 5)
+#define S5M8763_IRQ_PWRONF_MASK		(1 << 6)
+#define S5M8763_IRQ_PWRONR_MASK		(1 << 7)
+
+#define S5M8763_IRQ_WTSREVNT_MASK	(1 << 0)
+#define S5M8763_IRQ_SMPLEVNT_MASK	(1 << 1)
+#define S5M8763_IRQ_ALARM1_MASK		(1 << 2)
+#define S5M8763_IRQ_ALARM0_MASK		(1 << 3)
+
+#define S5M8763_IRQ_ONKEY1S_MASK	(1 << 0)
+#define S5M8763_IRQ_TOPOFFR_MASK	(1 << 2)
+#define S5M8763_IRQ_DCINOVPR_MASK	(1 << 3)
+#define S5M8763_IRQ_CHGRSTF_MASK	(1 << 4)
+#define S5M8763_IRQ_DONER_MASK		(1 << 5)
+#define S5M8763_IRQ_CHGFAULT_MASK	(1 << 7)
+
+#define S5M8763_IRQ_LOBAT1_MASK		(1 << 0)
+#define S5M8763_IRQ_LOBAT2_MASK		(1 << 1)
+
+#define S5M8763_ENRAMP                  (1 << 4)
+
+/**
+ * struct s5m87xx_dev - s5m87xx master device for sub-drivers
+ * @dev: master device of the chip (can be used to access platform data)
+ * @i2c: i2c client private data for regulator
+ * @rtc: i2c client private data for rtc
+ * @iolock: mutex for serializing io access
+ * @irqlock: mutex for buslock
+ * @irq_base: base IRQ number for s5m87xx, required for IRQs
+ * @irq: generic IRQ number for s5m87xx
+ * @ono: power onoff IRQ number for s5m87xx
+ * @irq_masks_cur: currently active value
+ * @irq_masks_cache: cached hardware value
+ * @type: indicate which s5m87xx "variant" is used
+ */
+struct s5m87xx_dev {
+	struct device *dev;
+	struct regmap *regmap;
+	struct i2c_client *i2c;
+	struct i2c_client *rtc;
+	struct mutex iolock;
+	struct mutex irqlock;
+
+	int device_type;
+	int irq_base;
+	int irq;
+	int ono;
+	u8 irq_masks_cur[NUM_IRQ_REGS];
+	u8 irq_masks_cache[NUM_IRQ_REGS];
+	int type;
+	bool wakeup;
+};
+
+int s5m_irq_init(struct s5m87xx_dev *s5m87xx);
+void s5m_irq_exit(struct s5m87xx_dev *s5m87xx);
+int s5m_irq_resume(struct s5m87xx_dev *s5m87xx);
+
+extern int s5m_reg_read(struct s5m87xx_dev *s5m87xx, u8 reg, void *dest);
+extern int s5m_bulk_read(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf);
+extern int s5m_reg_write(struct s5m87xx_dev *s5m87xx, u8 reg, u8 value);
+extern int s5m_bulk_write(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf);
+extern int s5m_reg_update(struct s5m87xx_dev *s5m87xx, u8 reg, u8 val, u8 mask);
+
+struct s5m_platform_data {
+	struct s5m_regulator_data	*regulators;
+	int				device_type;
+	int				num_regulators;
+
+	int				irq_base;
+	int 				(*cfg_pmic_irq)(void);
+
+	int				ono;
+	bool				wakeup;
+	bool				buck_voltage_lock;
+
+	int				buck_gpios[3];
+	int				buck2_voltage[8];
+	bool				buck2_gpiodvs;
+	int				buck3_voltage[8];
+	bool				buck3_gpiodvs;
+	int				buck4_voltage[8];
+	bool				buck4_gpiodvs;
+
+	int				buck_set1;
+	int				buck_set2;
+	int				buck_set3;
+	int				buck2_enable;
+	int				buck3_enable;
+	int				buck4_enable;
+	int				buck_default_idx;
+	int				buck2_default_idx;
+	int				buck3_default_idx;
+	int				buck4_default_idx;
+
+	int                             buck_ramp_delay;
+	bool                            buck2_ramp_enable;
+	bool                            buck3_ramp_enable;
+	bool                            buck4_ramp_enable;
+};
+
+#endif /*  __LINUX_MFD_S5M_CORE_H */
diff --git a/include/linux/mfd/s5m87xx/s5m-pmic.h b/include/linux/mfd/s5m87xx/s5m-pmic.h
new file mode 100644
index 000000000000..a72a5d27e62e
--- /dev/null
+++ b/include/linux/mfd/s5m87xx/s5m-pmic.h
@@ -0,0 +1,100 @@
+/* s5m87xx.h
+ *
+ * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef __LINUX_MFD_S5M_PMIC_H
+#define __LINUX_MFD_S5M_PMIC_H
+
+#include <linux/regulator/machine.h>
+
+/* S5M8767 regulator ids */
+enum s5m8767_regulators {
+	S5M8767_LDO1,
+	S5M8767_LDO2,
+	S5M8767_LDO3,
+	S5M8767_LDO4,
+	S5M8767_LDO5,
+	S5M8767_LDO6,
+	S5M8767_LDO7,
+	S5M8767_LDO8,
+	S5M8767_LDO9,
+	S5M8767_LDO10,
+	S5M8767_LDO11,
+	S5M8767_LDO12,
+	S5M8767_LDO13,
+	S5M8767_LDO14,
+	S5M8767_LDO15,
+	S5M8767_LDO16,
+	S5M8767_LDO17,
+	S5M8767_LDO18,
+	S5M8767_LDO19,
+	S5M8767_LDO20,
+	S5M8767_LDO21,
+	S5M8767_LDO22,
+	S5M8767_LDO23,
+	S5M8767_LDO24,
+	S5M8767_LDO25,
+	S5M8767_LDO26,
+	S5M8767_LDO27,
+	S5M8767_LDO28,
+	S5M8767_BUCK1,
+	S5M8767_BUCK2,
+	S5M8767_BUCK3,
+	S5M8767_BUCK4,
+	S5M8767_BUCK5,
+	S5M8767_BUCK6,
+	S5M8767_BUCK7,
+	S5M8767_BUCK8,
+	S5M8767_BUCK9,
+	S5M8767_AP_EN32KHZ,
+	S5M8767_CP_EN32KHZ,
+
+	S5M8767_REG_MAX,
+};
+
+/* S5M8763 regulator ids */
+enum s5m8763_regulators {
+	S5M8763_LDO1,
+	S5M8763_LDO2,
+	S5M8763_LDO3,
+	S5M8763_LDO4,
+	S5M8763_LDO5,
+	S5M8763_LDO6,
+	S5M8763_LDO7,
+	S5M8763_LDO8,
+	S5M8763_LDO9,
+	S5M8763_LDO10,
+	S5M8763_LDO11,
+	S5M8763_LDO12,
+	S5M8763_LDO13,
+	S5M8763_LDO14,
+	S5M8763_LDO15,
+	S5M8763_LDO16,
+	S5M8763_BUCK1,
+	S5M8763_BUCK2,
+	S5M8763_BUCK3,
+	S5M8763_BUCK4,
+	S5M8763_AP_EN32KHZ,
+	S5M8763_CP_EN32KHZ,
+	S5M8763_ENCHGVI,
+	S5M8763_ESAFEUSB1,
+	S5M8763_ESAFEUSB2,
+};
+
+/**
+ * s5m87xx_regulator_data - regulator data
+ * @id: regulator id
+ * @initdata: regulator init data (contraints, supplies, ...)
+ */
+struct s5m_regulator_data {
+	int				id;
+	struct regulator_init_data	*initdata;
+};
+
+#endif /*  __LINUX_MFD_S5M_PMIC_H */
diff --git a/include/linux/mfd/s5m87xx/s5m-rtc.h b/include/linux/mfd/s5m87xx/s5m-rtc.h
new file mode 100644
index 000000000000..6ce8da264cec
--- /dev/null
+++ b/include/linux/mfd/s5m87xx/s5m-rtc.h
@@ -0,0 +1,84 @@
+/*
+ * s5m-rtc.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MFD_S5M_RTC_H
+#define __LINUX_MFD_S5M_RTC_H
+
+enum s5m87xx_rtc_reg {
+	S5M87XX_RTC_SEC,
+	S5M87XX_RTC_MIN,
+	S5M87XX_RTC_HOUR,
+	S5M87XX_RTC_WEEKDAY,
+	S5M87XX_RTC_DATE,
+	S5M87XX_RTC_MONTH,
+	S5M87XX_RTC_YEAR1,
+	S5M87XX_RTC_YEAR2,
+	S5M87XX_ALARM0_SEC,
+	S5M87XX_ALARM0_MIN,
+	S5M87XX_ALARM0_HOUR,
+	S5M87XX_ALARM0_WEEKDAY,
+	S5M87XX_ALARM0_DATE,
+	S5M87XX_ALARM0_MONTH,
+	S5M87XX_ALARM0_YEAR1,
+	S5M87XX_ALARM0_YEAR2,
+	S5M87XX_ALARM1_SEC,
+	S5M87XX_ALARM1_MIN,
+	S5M87XX_ALARM1_HOUR,
+	S5M87XX_ALARM1_WEEKDAY,
+	S5M87XX_ALARM1_DATE,
+	S5M87XX_ALARM1_MONTH,
+	S5M87XX_ALARM1_YEAR1,
+	S5M87XX_ALARM1_YEAR2,
+	S5M87XX_ALARM0_CONF,
+	S5M87XX_ALARM1_CONF,
+	S5M87XX_RTC_STATUS,
+	S5M87XX_WTSR_SMPL_CNTL,
+	S5M87XX_RTC_UDR_CON,
+};
+
+#define RTC_I2C_ADDR		(0x0C >> 1)
+
+#define HOUR_12			(1 << 7)
+#define HOUR_AMPM		(1 << 6)
+#define HOUR_PM			(1 << 5)
+#define ALARM0_STATUS		(1 << 1)
+#define ALARM1_STATUS		(1 << 2)
+#define UPDATE_AD		(1 << 0)
+
+/* RTC Control Register */
+#define BCD_EN_SHIFT		0
+#define BCD_EN_MASK		(1 << BCD_EN_SHIFT)
+#define MODEL24_SHIFT		1
+#define MODEL24_MASK		(1 << MODEL24_SHIFT)
+/* RTC Update Register1 */
+#define RTC_UDR_SHIFT		0
+#define RTC_UDR_MASK		(1 << RTC_UDR_SHIFT)
+/* RTC Hour register */
+#define HOUR_PM_SHIFT		6
+#define HOUR_PM_MASK		(1 << HOUR_PM_SHIFT)
+/* RTC Alarm Enable */
+#define ALARM_ENABLE_SHIFT	7
+#define ALARM_ENABLE_MASK	(1 << ALARM_ENABLE_SHIFT)
+
+enum {
+	RTC_SEC = 0,
+	RTC_MIN,
+	RTC_HOUR,
+	RTC_WEEKDAY,
+	RTC_DATE,
+	RTC_MONTH,
+	RTC_YEAR1,
+	RTC_YEAR2,
+};
+
+#endif /*  __LINUX_MFD_S5M_RTC_H */
-- 
cgit v1.2.3


From ba74e80ebaf8209cb553eb2195b26302270cfa42 Mon Sep 17 00:00:00 2001
From: Kevin Liu <kliu5@marvell.com>
Date: Wed, 4 Jan 2012 15:14:24 +0800
Subject: mfd: Add pm ops to max8925

Signed-off-by: Kevin Liu <kliu5@marvell.com>
Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/max8925-i2c.c   | 27 +++++++++++++++++++++++++++
 include/linux/mfd/max8925.h |  2 ++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c
index 0219115e00c7..d9e4b36edee9 100644
--- a/drivers/mfd/max8925-i2c.c
+++ b/drivers/mfd/max8925-i2c.c
@@ -161,6 +161,8 @@ static int __devinit max8925_probe(struct i2c_client *client,
 	chip->adc = i2c_new_dummy(chip->i2c->adapter, ADC_I2C_ADDR);
 	i2c_set_clientdata(chip->adc, chip);
 
+	device_init_wakeup(&client->dev, 1);
+
 	max8925_device_init(chip, pdata);
 
 	return 0;
@@ -177,10 +179,35 @@ static int __devexit max8925_remove(struct i2c_client *client)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int max8925_suspend(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct max8925_chip *chip = i2c_get_clientdata(client);
+
+	if (device_may_wakeup(dev) && chip->wakeup_flag)
+		enable_irq_wake(chip->core_irq);
+	return 0;
+}
+
+static int max8925_resume(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct max8925_chip *chip = i2c_get_clientdata(client);
+
+	if (device_may_wakeup(dev) && chip->wakeup_flag)
+		disable_irq_wake(chip->core_irq);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(max8925_pm_ops, max8925_suspend, max8925_resume);
+
 static struct i2c_driver max8925_driver = {
 	.driver	= {
 		.name	= "max8925",
 		.owner	= THIS_MODULE,
+		.pm     = &max8925_pm_ops,
 	},
 	.probe		= max8925_probe,
 	.remove		= __devexit_p(max8925_remove),
diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index 5259dfe8c585..daaba00f0bc5 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h
@@ -206,6 +206,8 @@ struct max8925_chip {
 	int			irq_base;
 	int			core_irq;
 	int			tsc_irq;
+
+	unsigned int            wakeup_flag;
 };
 
 struct max8925_backlight_pdata {
-- 
cgit v1.2.3


From f78a26f55b2438c439609fc90b473f7f08f5b697 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 14 Dec 2011 01:01:05 +0900
Subject: sh: pfc: Variable bitfield width config register support

Add support for variable config reg hardware by adding
the macro PINMUX_CFG_REG_VAR(). The width of each bitfield
needs to be passed to the macro, and the correct space must
be consumed by each bitfield in the enum table following the
macro. Data registers still need to have fixed bitfields.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/pfc.c       | 44 +++++++++++++++++++++++++++++++++-----------
 include/linux/sh_pfc.h |  9 ++++++++-
 2 files changed, 41 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/pfc.c b/drivers/sh/pfc.c
index 5481d19518f9..f975f4a33439 100644
--- a/drivers/sh/pfc.c
+++ b/drivers/sh/pfc.c
@@ -174,10 +174,19 @@ static void config_reg_helper(struct pinmux_info *gpioc,
 			      unsigned long *maskp,
 			      unsigned long *posp)
 {
+	int k;
+
 	*mapped_regp = pfc_phys_to_virt(gpioc, crp->reg);
 
-	*maskp = (1 << crp->field_width) - 1;
-	*posp = crp->reg_width - ((in_pos + 1) * crp->field_width);
+	if (crp->field_width) {
+		*maskp = (1 << crp->field_width) - 1;
+		*posp = crp->reg_width - ((in_pos + 1) * crp->field_width);
+	} else {
+		*maskp = (1 << crp->var_field_width[in_pos]) - 1;
+		*posp = crp->reg_width;
+		for (k = 0; k <= in_pos; k++)
+			*posp -= crp->var_field_width[k];
+	}
 }
 
 static int read_config_reg(struct pinmux_info *gpioc,
@@ -303,8 +312,8 @@ static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id,
 			  unsigned long **cntp)
 {
 	struct pinmux_cfg_reg *config_reg;
-	unsigned long r_width, f_width;
-	int k, n;
+	unsigned long r_width, f_width, curr_width, ncomb;
+	int k, m, n, pos, bit_pos;
 
 	k = 0;
 	while (1) {
@@ -315,14 +324,27 @@ static int get_config_reg(struct pinmux_info *gpioc, pinmux_enum_t enum_id,
 
 		if (!r_width)
 			break;
-		for (n = 0; n < (r_width / f_width) * (1 << f_width); n++) {
-			if (config_reg->enum_ids[n] == enum_id) {
-				*crp = config_reg;
-				*fieldp = n / (1 << f_width);
-				*valuep = n % (1 << f_width);
-				*cntp = &config_reg->cnt[n / (1 << f_width)];
-				return 0;
+
+		pos = 0;
+		m = 0;
+		for (bit_pos = 0; bit_pos < r_width; bit_pos += curr_width) {
+			if (f_width)
+				curr_width = f_width;
+			else
+				curr_width = config_reg->var_field_width[m];
+
+			ncomb = 1 << curr_width;
+			for (n = 0; n < ncomb; n++) {
+				if (config_reg->enum_ids[pos + n] == enum_id) {
+					*crp = config_reg;
+					*fieldp = m;
+					*valuep = n;
+					*cntp = &config_reg->cnt[m];
+					return 0;
+				}
 			}
+			pos += ncomb;
+			m++;
 		}
 		k++;
 	}
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 91666a58529d..84538c42d64a 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -45,12 +45,19 @@ struct pinmux_cfg_reg {
 	unsigned long reg, reg_width, field_width;
 	unsigned long *cnt;
 	pinmux_enum_t *enum_ids;
+	unsigned long *var_field_width;
 };
 
 #define PINMUX_CFG_REG(name, r, r_width, f_width) \
 	.reg = r, .reg_width = r_width, .field_width = f_width,		\
 	.cnt = (unsigned long [r_width / f_width]) {}, \
-	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)]) \
+	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)])
+
+#define PINMUX_CFG_REG_VAR(name, r, r_width, var_fw0, var_fwn...) \
+	.reg = r, .reg_width = r_width,	\
+	.cnt = (unsigned long [r_width]) {}, \
+	.var_field_width = (unsigned long [r_width]) { var_fw0, var_fwn, 0 }, \
+	.enum_ids = (pinmux_enum_t [])
 
 struct pinmux_data_reg {
 	unsigned long reg, reg_width, reg_shadow;
-- 
cgit v1.2.3


From e499ada829cf769ac6f16627cd9f09b855a7fd6d Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 14 Dec 2011 01:01:14 +0900
Subject: sh: pfc: Unlock register support

Add PFC support for a 32-bit unlock register. Needed to
drive the r8a7779 PFC that comes with a funky PMMR register.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/pfc.c       | 22 ++++++++++------------
 include/linux/sh_pfc.h |  2 ++
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/pfc.c b/drivers/sh/pfc.c
index f975f4a33439..522c6c46d1be 100644
--- a/drivers/sh/pfc.c
+++ b/drivers/sh/pfc.c
@@ -210,7 +210,7 @@ static void write_config_reg(struct pinmux_info *gpioc,
 			     unsigned long field, unsigned long value)
 {
 	void __iomem *mapped_reg;
-	unsigned long mask, pos;
+	unsigned long mask, pos, data;
 
 	config_reg_helper(gpioc, crp, field, &mapped_reg, &mask, &pos);
 
@@ -221,17 +221,15 @@ static void write_config_reg(struct pinmux_info *gpioc,
 	mask = ~(mask << pos);
 	value = value << pos;
 
-	switch (crp->reg_width) {
-	case 8:
-		iowrite8((ioread8(mapped_reg) & mask) | value, mapped_reg);
-		break;
-	case 16:
-		iowrite16((ioread16(mapped_reg) & mask) | value, mapped_reg);
-		break;
-	case 32:
-		iowrite32((ioread32(mapped_reg) & mask) | value, mapped_reg);
-		break;
-	}
+	data = gpio_read_raw_reg(mapped_reg, crp->reg_width);
+	data &= mask;
+	data |= value;
+
+	if (gpioc->unlock_reg)
+		gpio_write_raw_reg(pfc_phys_to_virt(gpioc, gpioc->unlock_reg),
+				   32, ~data);
+
+	gpio_write_raw_reg(mapped_reg, crp->reg_width, data);
 }
 
 static int setup_data_reg(struct pinmux_info *gpioc, unsigned gpio)
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
index 84538c42d64a..5c15aed9c4b2 100644
--- a/include/linux/sh_pfc.h
+++ b/include/linux/sh_pfc.h
@@ -116,6 +116,8 @@ struct pinmux_info {
 	unsigned int num_resources;
 	struct pfc_window *window;
 
+	unsigned long unlock_reg;
+
 	struct gpio_chip chip;
 };
 
-- 
cgit v1.2.3


From c1257b4798d48b73ad1a9ca359504cd49caefa0d Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Wed, 2 Nov 2011 13:34:42 -0700
Subject: mtd: nand: add Macronix manufacturer

Macronix is produing SLC NAND MX30LF1208AA, so add their manufacturer
code to the manufacturer lists.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_ids.c | 1 +
 include/linux/mtd/nand.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 00cf1b0d6053..56c688dafe92 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -176,6 +176,7 @@ struct nand_manufacturers nand_manuf_ids[] = {
 	{NAND_MFR_HYNIX, "Hynix"},
 	{NAND_MFR_MICRON, "Micron"},
 	{NAND_MFR_AMD, "AMD"},
+	{NAND_MFR_MACRONIX, "Macronix"},
 	{0x0, "Unknown"}
 };
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 904131bab501..63b5a8b6dfbd 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -555,6 +555,7 @@ struct nand_chip {
 #define NAND_MFR_HYNIX		0xad
 #define NAND_MFR_MICRON		0x2c
 #define NAND_MFR_AMD		0x01
+#define NAND_MFR_MACRONIX	0xc2
 
 /**
  * struct nand_flash_dev - NAND Flash Device ID Structure
-- 
cgit v1.2.3


From 8e987465a137d4824710e02550f06aa891c9b865 Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Mon, 14 Nov 2011 18:44:34 -0600
Subject: mtd: cfi: Allow per-mapping CFI device endianness

This patch allows each CFI device map to use its own endianness. The
globally defined CFI endianness (CONFIG_MTD_CFI_NOSWAP,
CONFIG_MTD_CFI_BE_BYTE_SWAP or CONFIG_MTD_CFI_LE_BYTE_SWAP) becomes the
default value which can be overridden by a driver for a particular device.

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0020.c |  5 ++-
 include/linux/mtd/cfi.h             | 16 ++++----
 include/linux/mtd/cfi_endian.h      | 76 ++++++++++++++-----------------------
 include/linux/mtd/map.h             |  1 +
 4 files changed, 41 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 179814a95f3a..666c52f8bf8d 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -139,8 +139,9 @@ struct mtd_info *cfi_cmdset_0020(struct map_info *map, int primary)
 		}
 
 		/* Do some byteswapping if necessary */
-		extp->FeatureSupport = cfi32_to_cpu(extp->FeatureSupport);
-		extp->BlkStatusRegMask = cfi32_to_cpu(extp->BlkStatusRegMask);
+		extp->FeatureSupport = cfi32_to_cpu(map, extp->FeatureSupport);
+		extp->BlkStatusRegMask = cfi32_to_cpu(map,
+						extp->BlkStatusRegMask);
 
 #ifdef DEBUG_CFI_FEATURES
 		/* Tell the user about it in lots of lovely detail */
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index d24925492972..d5d2ec6494bb 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -354,10 +354,10 @@ static inline map_word cfi_build_cmd(u_long cmd, struct map_info *map, struct cf
 		onecmd = cmd;
 		break;
 	case 2:
-		onecmd = cpu_to_cfi16(cmd);
+		onecmd = cpu_to_cfi16(map, cmd);
 		break;
 	case 4:
-		onecmd = cpu_to_cfi32(cmd);
+		onecmd = cpu_to_cfi32(map, cmd);
 		break;
 	}
 
@@ -437,10 +437,10 @@ static inline unsigned long cfi_merge_status(map_word val, struct map_info *map,
 	case 1:
 		break;
 	case 2:
-		res = cfi16_to_cpu(res);
+		res = cfi16_to_cpu(map, res);
 		break;
 	case 4:
-		res = cfi32_to_cpu(res);
+		res = cfi32_to_cpu(map, res);
 		break;
 	default: BUG();
 	}
@@ -480,12 +480,12 @@ static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr)
 	if (map_bankwidth_is_1(map)) {
 		return val.x[0];
 	} else if (map_bankwidth_is_2(map)) {
-		return cfi16_to_cpu(val.x[0]);
+		return cfi16_to_cpu(map, val.x[0]);
 	} else {
 		/* No point in a 64-bit byteswap since that would just be
 		   swapping the responses from different chips, and we are
 		   only interested in one chip (a representative sample) */
-		return cfi32_to_cpu(val.x[0]);
+		return cfi32_to_cpu(map, val.x[0]);
 	}
 }
 
@@ -496,12 +496,12 @@ static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr)
 	if (map_bankwidth_is_1(map)) {
 		return val.x[0] & 0xff;
 	} else if (map_bankwidth_is_2(map)) {
-		return cfi16_to_cpu(val.x[0]);
+		return cfi16_to_cpu(map, val.x[0]);
 	} else {
 		/* No point in a 64-bit byteswap since that would just be
 		   swapping the responses from different chips, and we are
 		   only interested in one chip (a representative sample) */
-		return cfi32_to_cpu(val.x[0]);
+		return cfi32_to_cpu(map, val.x[0]);
 	}
 }
 
diff --git a/include/linux/mtd/cfi_endian.h b/include/linux/mtd/cfi_endian.h
index 51cc3f5917a8..b97a625071f8 100644
--- a/include/linux/mtd/cfi_endian.h
+++ b/include/linux/mtd/cfi_endian.h
@@ -19,53 +19,35 @@
 
 #include <asm/byteorder.h>
 
-#ifndef CONFIG_MTD_CFI_ADV_OPTIONS
-
-#define CFI_HOST_ENDIAN
-
-#else
-
-#ifdef CONFIG_MTD_CFI_NOSWAP
-#define CFI_HOST_ENDIAN
-#endif
-
-#ifdef CONFIG_MTD_CFI_LE_BYTE_SWAP
-#define CFI_LITTLE_ENDIAN
-#endif
-
-#ifdef CONFIG_MTD_CFI_BE_BYTE_SWAP
-#define CFI_BIG_ENDIAN
-#endif
-
-#endif
-
-#if defined(CFI_LITTLE_ENDIAN)
-#define cpu_to_cfi8(x) (x)
-#define cfi8_to_cpu(x) (x)
-#define cpu_to_cfi16(x) cpu_to_le16(x)
-#define cpu_to_cfi32(x) cpu_to_le32(x)
-#define cpu_to_cfi64(x) cpu_to_le64(x)
-#define cfi16_to_cpu(x) le16_to_cpu(x)
-#define cfi32_to_cpu(x) le32_to_cpu(x)
-#define cfi64_to_cpu(x) le64_to_cpu(x)
-#elif defined (CFI_BIG_ENDIAN)
-#define cpu_to_cfi8(x) (x)
-#define cfi8_to_cpu(x) (x)
-#define cpu_to_cfi16(x) cpu_to_be16(x)
-#define cpu_to_cfi32(x) cpu_to_be32(x)
-#define cpu_to_cfi64(x) cpu_to_be64(x)
-#define cfi16_to_cpu(x) be16_to_cpu(x)
-#define cfi32_to_cpu(x) be32_to_cpu(x)
-#define cfi64_to_cpu(x) be64_to_cpu(x)
-#elif defined (CFI_HOST_ENDIAN)
-#define cpu_to_cfi8(x) (x)
-#define cfi8_to_cpu(x) (x)
-#define cpu_to_cfi16(x) (x)
-#define cpu_to_cfi32(x) (x)
-#define cpu_to_cfi64(x) (x)
-#define cfi16_to_cpu(x) (x)
-#define cfi32_to_cpu(x) (x)
-#define cfi64_to_cpu(x) (x)
+#define CFI_HOST_ENDIAN 1
+#define CFI_LITTLE_ENDIAN 2
+#define CFI_BIG_ENDIAN 3
+
+#if !defined(CONFIG_MTD_CFI_ADV_OPTIONS) || defined(CONFIG_MTD_CFI_NOSWAP)
+#define CFI_DEFAULT_ENDIAN CFI_HOST_ENDIAN
+#elif defined(CONFIG_MTD_CFI_LE_BYTE_SWAP)
+#define CFI_DEFAULT_ENDIAN CFI_LITTLE_ENDIAN
+#elif defined(CONFIG_MTD_CFI_BE_BYTE_SWAP)
+#define CFI_DEFAULT_ENDIAN CFI_BIG_ENDIAN
 #else
 #error No CFI endianness defined
 #endif
+
+#define cfi_default(s) ((s)?:CFI_DEFAULT_ENDIAN)
+#define cfi_be(s) (cfi_default(s) == CFI_BIG_ENDIAN)
+#define cfi_le(s) (cfi_default(s) == CFI_LITTLE_ENDIAN)
+#define cfi_host(s) (cfi_default(s) == CFI_HOST_ENDIAN)
+
+#define cpu_to_cfi8(map, x) (x)
+#define cfi8_to_cpu(map, x) (x)
+#define cpu_to_cfi16(map, x) _cpu_to_cfi(16, (map)->swap, (x))
+#define cpu_to_cfi32(map, x) _cpu_to_cfi(32, (map)->swap, (x))
+#define cpu_to_cfi64(map, x) _cpu_to_cfi(64, (map)->swap, (x))
+#define cfi16_to_cpu(map, x) _cfi_to_cpu(16, (map)->swap, (x))
+#define cfi32_to_cpu(map, x) _cfi_to_cpu(32, (map)->swap, (x))
+#define cfi64_to_cpu(map, x) _cfi_to_cpu(64, (map)->swap, (x))
+
+#define _cpu_to_cfi(w, s, x) (cfi_host(s)?(x):_swap_to_cfi(w, s, x))
+#define _cfi_to_cpu(w, s, x) (cfi_host(s)?(x):_swap_to_cpu(w, s, x))
+#define _swap_to_cfi(w, s, x) (cfi_be(s)?cpu_to_be##w(x):cpu_to_le##w(x))
+#define _swap_to_cpu(w, s, x) (cfi_be(s)?be##w##_to_cpu(x):le##w##_to_cpu(x))
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index a9e6ba46865e..1132410f14c6 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -214,6 +214,7 @@ struct map_info {
 	void __iomem *virt;
 	void *cached;
 
+	int swap; /* this mapping's byte-swapping requirement */
 	int bankwidth; /* in octets. This isn't necessarily the width
 		       of actual bus cycles -- it's the repeat interval
 		      in bytes, before you are talking to the first chip again.
-- 
cgit v1.2.3


From 529688fed64a7759323cbd170754c61aad0dd48b Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Mon, 5 Dec 2011 16:08:09 +0100
Subject: mtd: maps: physmap: allow partition parsers for physmap_flash_data

Arch setup code might want to use their own partition parsers, but still
use the generic physmap flash driver.

Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Acked-by: Florian Fainelli <florian@openwrt.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap.c  | 5 ++++-
 include/linux/mtd/physmap.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 66e8200079c2..1f749d58ae6f 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -85,6 +85,7 @@ static int physmap_flash_probe(struct platform_device *dev)
 	struct physmap_flash_data *physmap_data;
 	struct physmap_flash_info *info;
 	const char **probe_type;
+	const char **part_types;
 	int err = 0;
 	int i;
 	int devices_found = 0;
@@ -171,7 +172,9 @@ static int physmap_flash_probe(struct platform_device *dev)
 	if (err)
 		goto err_out;
 
-	mtd_device_parse_register(info->cmtd, part_probe_types, 0,
+	part_types = physmap_data->part_probe_types ? : part_probe_types;
+
+	mtd_device_parse_register(info->cmtd, part_types, 0,
 				  physmap_data->parts, physmap_data->nr_parts);
 	return 0;
 
diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h
index 04e018160e2b..d2887e76b7f6 100644
--- a/include/linux/mtd/physmap.h
+++ b/include/linux/mtd/physmap.h
@@ -30,6 +30,7 @@ struct physmap_flash_data {
 	unsigned int		pfow_base;
 	char                    *probe_type;
 	struct mtd_partition	*parts;
+	const char		**part_probe_types;
 };
 
 #endif /* __LINUX_MTD_PHYSMAP__ */
-- 
cgit v1.2.3


From 4a42243886b87cd28a39b192161767c2af851a55 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 18:28:01 +0200
Subject: mtd: map.h: fix arm cross-build failure

This patch fixes the following build failure:
In file included from include/linux/mtd/qinfo.h:4:0,
                 from include/linux/mtd/pfow.h:7,
                 from drivers/mtd/lpddr/lpddr_cmds.c:27:
include/linux/mtd/map.h: In function 'inline_map_read':
include/linux/mtd/map.h:409:3: error: implicit declaration of function 'BUILD_BUG_ON' [-Werror=implicit-function-declaration]

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 1132410f14c6..94e924e2ecd5 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -26,7 +26,7 @@
 #include <linux/list.h>
 #include <linux/string.h>
 #include <linux/bug.h>
-
+#include <linux/kernel.h>
 
 #include <asm/unaligned.h>
 #include <asm/system.h>
-- 
cgit v1.2.3


From 7e1f0dc0551b99acb5e8fa161a7ac401994d57d8 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 15:25:39 +0200
Subject: mtd: introduce mtd_erase interface

This patch is part of a patch-set which changes the MTD interface
from 'mtd->func()' form to 'mtd_func()' form. We need this because
we want to add common code to to all drivers in the mtd core level,
which is impossible with the current interface when MTD clients
call driver functions like 'read()' or 'write()' directly.

At this point we just introduce a new inline wrapper function, but
later some of them are expected to gain more code. E.g., the input
parameters check should be moved to the wrappers rather than be
duplicated at many drivers.

This particular patch introduced the 'mtd_erase()' interface. The
following patches add all the other interfaces one by one.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/ftl.c                   |  2 +-
 drivers/mtd/inftlmount.c            |  4 ++--
 drivers/mtd/mtdblock.c              |  2 +-
 drivers/mtd/mtdchar.c               |  2 +-
 drivers/mtd/mtdconcat.c             |  2 +-
 drivers/mtd/mtdoops.c               |  2 +-
 drivers/mtd/mtdpart.c               |  2 +-
 drivers/mtd/mtdswap.c               |  2 +-
 drivers/mtd/nftlmount.c             |  2 +-
 drivers/mtd/rfd_ftl.c               |  2 +-
 drivers/mtd/sm_ftl.c                |  2 +-
 drivers/mtd/tests/mtd_oobtest.c     |  2 +-
 drivers/mtd/tests/mtd_pagetest.c    |  2 +-
 drivers/mtd/tests/mtd_speedtest.c   |  4 ++--
 drivers/mtd/tests/mtd_stresstest.c  |  2 +-
 drivers/mtd/tests/mtd_subpagetest.c |  2 +-
 drivers/mtd/tests/mtd_torturetest.c |  2 +-
 drivers/mtd/ubi/io.c                |  2 +-
 drivers/staging/spectra/lld_mtd.c   |  2 +-
 fs/jffs2/erase.c                    |  2 +-
 fs/logfs/dev_mtd.c                  |  2 +-
 include/linux/mtd/mtd.h             | 19 ++++++++++++++-----
 22 files changed, 37 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index c7382bb686c6..a982889277c8 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -355,7 +355,7 @@ static int erase_xfer(partition_t *part,
     erase->len = 1 << part->header.EraseUnitSize;
     erase->priv = (u_long)part;
 
-    ret = part->mbd.mtd->erase(part->mbd.mtd, erase);
+    ret = mtd_erase(part->mbd.mtd, erase);
 
     if (!ret)
 	    xfer->EraseCount++;
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 2ff601f816ce..0d946f10a682 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -220,7 +220,7 @@ static int find_boot_record(struct INFTLrecord *inftl)
 				 */
 				instr->addr = ip->Reserved0 * inftl->EraseSize;
 				instr->len = inftl->EraseSize;
-				mtd->erase(mtd, instr);
+				mtd_erase(mtd, instr);
 			}
 			if ((ip->lastUnit - ip->firstUnit + 1) < ip->virtualUnits) {
 				printk(KERN_WARNING "INFTL: Media Header "
@@ -393,7 +393,7 @@ int INFTL_formatblock(struct INFTLrecord *inftl, int block)
 	   mark only the failed block in the bbt. */
 	for (physblock = 0; physblock < inftl->EraseSize;
 	     physblock += instr->len, instr->addr += instr->len) {
-		mtd->erase(inftl->mbd.mtd, instr);
+		mtd_erase(inftl->mbd.mtd, instr);
 
 		if (instr->state == MTD_ERASE_FAILED) {
 			printk(KERN_WARNING "INFTL: error while formatting block %d\n",
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 7c1dc908a174..9b01cb0266e4 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -85,7 +85,7 @@ static int erase_write (struct mtd_info *mtd, unsigned long pos,
 	set_current_state(TASK_INTERRUPTIBLE);
 	add_wait_queue(&wait_q, &wait);
 
-	ret = mtd->erase(mtd, &erase);
+	ret = mtd_erase(mtd, &erase);
 	if (ret) {
 		set_current_state(TASK_RUNNING);
 		remove_wait_queue(&wait_q, &wait);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 00423cc85807..41d64ff4c252 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -731,7 +731,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			  wq_head is no longer there when the
 			  callback routine tries to wake us up.
 			*/
-			ret = mtd->erase(mtd, erase);
+			ret = mtd_erase(mtd, erase);
 			if (!ret) {
 				set_current_state(TASK_UNINTERRUPTIBLE);
 				add_wait_queue(&waitq, &wait);
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 6df4d4d4eb92..76123bd49314 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -379,7 +379,7 @@ static int concat_dev_erase(struct mtd_info *mtd, struct erase_info *erase)
 	 * FIXME: Allow INTERRUPTIBLE. Which means
 	 * not having the wait_queue head on the stack.
 	 */
-	err = mtd->erase(mtd, erase);
+	err = mtd_erase(mtd, erase);
 	if (!err) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&waitq, &wait);
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index f3cdce9a85a6..9b2d86323169 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -112,7 +112,7 @@ static int mtdoops_erase_block(struct mtdoops_context *cxt, int offset)
 	set_current_state(TASK_INTERRUPTIBLE);
 	add_wait_queue(&wait_q, &wait);
 
-	ret = mtd->erase(mtd, &erase);
+	ret = mtd_erase(mtd, &erase);
 	if (ret) {
 		set_current_state(TASK_RUNNING);
 		remove_wait_queue(&wait_q, &wait);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index a0bd2de4752b..d318fee28595 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -257,7 +257,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
 	if (instr->addr >= mtd->size)
 		return -EINVAL;
 	instr->addr += part->offset;
-	ret = part->master->erase(part->master, instr);
+	ret = mtd_erase(part->master, instr);
 	if (ret) {
 		if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN)
 			instr->fail_addr -= part->offset;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index bd9590c723e4..4e12875a916c 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -567,7 +567,7 @@ retry:
 	erase.len	= mtd->erasesize;
 	erase.priv	= (u_long)&wq;
 
-	ret = mtd->erase(mtd, &erase);
+	ret = mtd_erase(mtd, &erase);
 	if (ret) {
 		if (retries++ < MTDSWAP_ERASE_RETRIES) {
 			dev_warn(d->dev,
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index ac4092591aea..9164a56fb5c0 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -326,7 +326,7 @@ int NFTL_formatblock(struct NFTLrecord *nftl, int block)
 	instr->mtd = nftl->mbd.mtd;
 	instr->addr = block * nftl->EraseSize;
 	instr->len = nftl->EraseSize;
-	mtd->erase(mtd, instr);
+	mtd_erase(mtd, instr);
 
 	if (instr->state == MTD_ERASE_FAILED) {
 		printk("Error while formatting block %d\n", block);
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index 73ae217a4252..39de8727a524 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -342,7 +342,7 @@ static int erase_block(struct partition *part, int block)
 	part->blocks[block].state = BLOCK_ERASING;
 	part->blocks[block].free_sectors = 0;
 
-	rc = part->mbd.mtd->erase(part->mbd.mtd, erase);
+	rc = mtd_erase(part->mbd.mtd, erase);
 
 	if (rc) {
 		printk(KERN_ERR PREFIX "erase of region %llx,%llx on '%s' "
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 1c9f307ae0a1..2f1acb1ab5e8 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -479,7 +479,7 @@ static int sm_erase_block(struct sm_ftl *ftl, int zone_num, uint16_t block,
 		return -EIO;
 	}
 
-	if (mtd->erase(mtd, &erase)) {
+	if (mtd_erase(mtd, &erase)) {
 		sm_printk("erase of block %d in zone %d failed",
 							block, zone_num);
 		goto error;
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index 933f7e5f32d3..7d52854c16dd 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -78,7 +78,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index afafb6935fd0..271819fabb55 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -77,7 +77,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 493b367bdd35..f67a65e21043 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -79,7 +79,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
@@ -105,7 +105,7 @@ static int multiblock_erase(int ebnum, int blocks)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize * blocks;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d, blocks %d\n",
 		       err, ebnum, blocks);
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index 811642fea6b4..a204a9f90524 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -112,7 +112,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (unlikely(err)) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index 1a05bfac4eee..16d0c05024d7 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -80,7 +80,7 @@ static int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index 03ab649a6964..102c79b7ac66 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -105,7 +105,7 @@ static inline int erase_eraseblock(int ebnum)
 	ei.addr = addr;
 	ei.len  = mtd->erasesize;
 
-	err = mtd->erase(mtd, &ei);
+	err = mtd_erase(mtd, &ei);
 	if (err) {
 		printk(PRINT_PREF "error %d while erasing EB %d\n", err, ebnum);
 		return err;
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index f20b6f22f240..b6c8959e6c7e 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -361,7 +361,7 @@ retry:
 	ei.callback = erase_callback;
 	ei.priv     = (unsigned long)&wq;
 
-	err = ubi->mtd->erase(ubi->mtd, &ei);
+	err = mtd_erase(ubi->mtd, &ei);
 	if (err) {
 		if (retries++ < UBI_IO_RETRIES) {
 			dbg_io("error %d while erasing PEB %d, retry",
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index a9c309a167c2..d638fafab649 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -188,7 +188,7 @@ u16 mtd_Erase_Block(u32 block_add)
 	erase.len = spectra_mtd->erasesize;
 	erase.priv = (unsigned long)&comp;
 
-	ret = spectra_mtd->erase(spectra_mtd, &erase);
+	ret = mtd_erase(spectra_mtd, &erase);
 	if (!ret) {
 		wait_for_completion(&comp);
 		if (erase.state != MTD_ERASE_DONE)
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index e513f1913c15..540e8eca1b49 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -74,7 +74,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
 	((struct erase_priv_struct *)instr->priv)->jeb = jeb;
 	((struct erase_priv_struct *)instr->priv)->c = c;
 
-	ret = c->mtd->erase(c->mtd, instr);
+	ret = mtd_erase(c->mtd, instr);
 	if (!ret)
 		return;
 
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index eb423ebcf538..046362894352 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -105,7 +105,7 @@ static int logfs_mtd_erase(struct super_block *sb, loff_t ofs, size_t len,
 	ei.len = len;
 	ei.callback = logfs_erase_callback;
 	ei.priv = (long)&complete;
-	ret = mtd->erase(mtd, &ei);
+	ret = mtd_erase(mtd, &ei);
 	if (ret)
 		return -EIO;
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 9f5b312af783..201bad557047 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -171,11 +171,8 @@ struct mtd_info {
 	struct mtd_erase_region_info *eraseregions;
 
 	/*
-	 * Erase is an asynchronous operation.  Device drivers are supposed
-	 * to call instr->callback() whenever the operation completes, even
-	 * if it completes with a failure.
-	 * Callers are supposed to pass a callback function and wait for it
-	 * to be called before writing to the block.
+	 * Do not call via these pointers, use corresponding mtd_*()
+	 * wrappers instead.
 	 */
 	int (*erase) (struct mtd_info *mtd, struct erase_info *instr);
 
@@ -274,6 +271,18 @@ struct mtd_info {
 	void (*put_device) (struct mtd_info *mtd);
 };
 
+/*
+ * Erase is an asynchronous operation.  Device drivers are supposed
+ * to call instr->callback() whenever the operation completes, even
+ * if it completes with a failure.
+ * Callers are supposed to pass a callback function and wait for it
+ * to be called before writing to the block.
+ */
+static inline int mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	return mtd->erase(mtd, instr);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From d35ea200c0fb5315f16fb2599a4bafd9c1a7b386 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:00:37 +0200
Subject: mtd: introduce mtd_point interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdpart.c   |  4 ++--
 fs/jffs2/erase.c        |  4 ++--
 fs/jffs2/readinode.c    |  4 ++--
 fs/jffs2/scan.c         |  4 ++--
 include/linux/mtd/mtd.h | 14 ++++++++++----
 5 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index d318fee28595..5b664722e5b0 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -89,8 +89,8 @@ static int part_point(struct mtd_info *mtd, loff_t from, size_t len,
 		len = 0;
 	else if (from + len > mtd->size)
 		len = mtd->size - from;
-	return part->master->point (part->master, from + part->offset,
-				    len, retlen, virt, phys);
+	return mtd_point(part->master, from + part->offset, len, retlen,
+			 virt, phys);
 }
 
 static void part_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 540e8eca1b49..53f8794fda6a 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -340,8 +340,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 	if (c->mtd->point) {
 		unsigned long *wordebuf;
 
-		ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size,
-				    &retlen, &ebuf, NULL);
+		ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen,
+				&ebuf, NULL);
 		if (ret) {
 			D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
 			goto do_flash_read;
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index ee57bac1ba6d..dde61effeda2 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -63,8 +63,8 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 	/* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
 	 * adding and jffs2_flash_read_end() interface. */
 	if (c->mtd->point) {
-		err = c->mtd->point(c->mtd, ofs, len, &retlen,
-				    (void **)&buffer, NULL);
+		err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer,
+				NULL);
 		if (!err && retlen < len) {
 			JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
 			c->mtd->unpoint(c->mtd, ofs, retlen);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 28107ca136e4..53e05c8e5b69 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -97,8 +97,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 	size_t pointlen, try_size;
 
 	if (c->mtd->point) {
-		ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen,
-				    (void **)&flashbuf, NULL);
+		ret = mtd_point(c->mtd, 0, c->mtd->size, &pointlen,
+				(void **)&flashbuf, NULL);
 		if (!ret && pointlen < c->mtd->size) {
 			/* Don't muck about if it won't let us point to the whole flash */
 			D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 201bad557047..ca7bfdaf7a6f 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -175,11 +175,8 @@ struct mtd_info {
 	 * wrappers instead.
 	 */
 	int (*erase) (struct mtd_info *mtd, struct erase_info *instr);
-
-	/* This stuff for eXecute-In-Place */
-	/* phys is optional and may be set to NULL */
 	int (*point) (struct mtd_info *mtd, loff_t from, size_t len,
-			size_t *retlen, void **virt, resource_size_t *phys);
+		      size_t *retlen, void **virt, resource_size_t *phys);
 
 	/* We probably shouldn't allow XIP if the unpoint isn't a NULL */
 	void (*unpoint) (struct mtd_info *mtd, loff_t from, size_t len);
@@ -283,6 +280,15 @@ static inline int mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
 	return mtd->erase(mtd, instr);
 }
 
+/*
+ * This stuff for eXecute-In-Place. phys is optional and may be set to NULL.
+ */
+static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len,
+			    size_t *retlen, void **virt, resource_size_t *phys)
+{
+	return mtd->point(mtd, from, len, retlen, virt, phys);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 7219778ad9c18cc2c05c7fca0abe026afbc19dfb Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:05:52 +0200
Subject: mtd: introduce mtd_unpoint interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdpart.c   | 2 +-
 fs/jffs2/erase.c        | 4 ++--
 fs/jffs2/readinode.c    | 6 +++---
 fs/jffs2/scan.c         | 4 ++--
 include/linux/mtd/mtd.h | 8 ++++++--
 5 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 5b664722e5b0..b09624a5497c 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -97,7 +97,7 @@ static void part_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 {
 	struct mtd_part *part = PART(mtd);
 
-	part->master->unpoint(part->master, from + part->offset, len);
+	mtd_unpoint(part->master, from + part->offset, len);
 }
 
 static unsigned long part_get_unmapped_area(struct mtd_info *mtd,
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 53f8794fda6a..ffdf4fca9c54 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -349,7 +349,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 		if (retlen < c->sector_size) {
 			/* Don't muck about if it won't let us point to the whole erase sector */
 			D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen));
-			c->mtd->unpoint(c->mtd, jeb->offset, retlen);
+			mtd_unpoint(c->mtd, jeb->offset, retlen);
 			goto do_flash_read;
 		}
 		wordebuf = ebuf-sizeof(*wordebuf);
@@ -358,7 +358,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 		   if (*++wordebuf != ~0)
 			   break;
 		} while(--retlen);
-		c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size);
+		mtd_unpoint(c->mtd, jeb->offset, c->sector_size);
 		if (retlen) {
 			printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
 			       *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf));
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index dde61effeda2..fca2f84e1add 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -67,7 +67,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 				NULL);
 		if (!err && retlen < len) {
 			JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
-			c->mtd->unpoint(c->mtd, ofs, retlen);
+			mtd_unpoint(c->mtd, ofs, retlen);
 		} else if (err)
 			JFFS2_WARNING("MTD point failed: error code %d.\n", err);
 		else
@@ -101,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 		kfree(buffer);
 #ifndef __ECOS
 	else
-		c->mtd->unpoint(c->mtd, ofs, len);
+		mtd_unpoint(c->mtd, ofs, len);
 #endif
 
 	if (crc != tn->data_crc) {
@@ -137,7 +137,7 @@ free_out:
 		kfree(buffer);
 #ifndef __ECOS
 	else
-		c->mtd->unpoint(c->mtd, ofs, len);
+		mtd_unpoint(c->mtd, ofs, len);
 #endif
 	return err;
 }
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 53e05c8e5b69..72f3960f44a9 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -102,7 +102,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 		if (!ret && pointlen < c->mtd->size) {
 			/* Don't muck about if it won't let us point to the whole flash */
 			D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
-			c->mtd->unpoint(c->mtd, 0, pointlen);
+			mtd_unpoint(c->mtd, 0, pointlen);
 			flashbuf = NULL;
 		}
 		if (ret)
@@ -273,7 +273,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 		kfree(flashbuf);
 #ifndef __ECOS
 	else
-		c->mtd->unpoint(c->mtd, 0, c->mtd->size);
+		mtd_unpoint(c->mtd, 0, c->mtd->size);
 #endif
 	kfree(s);
 	return ret;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index ca7bfdaf7a6f..a7d22b7fcb4c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -177,8 +177,6 @@ struct mtd_info {
 	int (*erase) (struct mtd_info *mtd, struct erase_info *instr);
 	int (*point) (struct mtd_info *mtd, loff_t from, size_t len,
 		      size_t *retlen, void **virt, resource_size_t *phys);
-
-	/* We probably shouldn't allow XIP if the unpoint isn't a NULL */
 	void (*unpoint) (struct mtd_info *mtd, loff_t from, size_t len);
 
 	/* Allow NOMMU mmap() to directly map the device (if not NULL)
@@ -289,6 +287,12 @@ static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len,
 	return mtd->point(mtd, from, len, retlen, virt, phys);
 }
 
+/* We probably shouldn't allow XIP if the unpoint isn't a NULL */
+static inline void mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
+{
+	return mtd->unpoint(mtd, from, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 04c601bfa4cb29c968dcb66e44c799c9c01d8675 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:10:15 +0200
Subject: mtd: introduce mtd_get_unmapped_area interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  2 +-
 drivers/mtd/mtdconcat.c |  4 ++--
 drivers/mtd/mtdpart.c   |  3 +--
 fs/romfs/mmap-nommu.c   |  2 +-
 include/linux/mtd/mtd.h | 18 +++++++++++++-----
 5 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 41d64ff4c252..c51f04a00afb 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1135,7 +1135,7 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file,
 		if (offset > mtd->size - len)
 			return (unsigned long) -EINVAL;
 
-		return mtd->get_unmapped_area(mtd, len, offset, flags);
+		return mtd_get_unmapped_area(mtd, len, offset, flags);
 	}
 
 	/* can't map directly */
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 76123bd49314..b3895cf20bb2 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -726,8 +726,8 @@ static unsigned long concat_get_unmapped_area(struct mtd_info *mtd,
 			return (unsigned long) -EINVAL;
 
 		if (subdev->get_unmapped_area)
-			return subdev->get_unmapped_area(subdev, len, offset,
-							 flags);
+			return mtd_get_unmapped_area(subdev, len, offset,
+						     flags);
 
 		break;
 	}
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index b09624a5497c..55a9cb544fc1 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -108,8 +108,7 @@ static unsigned long part_get_unmapped_area(struct mtd_info *mtd,
 	struct mtd_part *part = PART(mtd);
 
 	offset += part->offset;
-	return part->master->get_unmapped_area(part->master, len, offset,
-					       flags);
+	return mtd_get_unmapped_area(part->master, len, offset, flags);
 }
 
 static int part_read_oob(struct mtd_info *mtd, loff_t from,
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index eed99428f104..d5168e8e7dcb 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -53,7 +53,7 @@ static unsigned long romfs_get_unmapped_area(struct file *file,
 		if (offset > mtd->size - len)
 			return (unsigned long) -EINVAL;
 
-		return mtd->get_unmapped_area(mtd, len, offset, flags);
+		return mtd_get_unmapped_area(mtd, len, offset, flags);
 	}
 
 cant_map_directly:
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a7d22b7fcb4c..f38e8276b408 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -178,11 +178,6 @@ struct mtd_info {
 	int (*point) (struct mtd_info *mtd, loff_t from, size_t len,
 		      size_t *retlen, void **virt, resource_size_t *phys);
 	void (*unpoint) (struct mtd_info *mtd, loff_t from, size_t len);
-
-	/* Allow NOMMU mmap() to directly map the device (if not NULL)
-	 * - return the address to which the offset maps
-	 * - return -ENOSYS to indicate refusal to do the mapping
-	 */
 	unsigned long (*get_unmapped_area) (struct mtd_info *mtd,
 					    unsigned long len,
 					    unsigned long offset,
@@ -293,6 +288,19 @@ static inline void mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 	return mtd->unpoint(mtd, from, len);
 }
 
+/*
+ * Allow NOMMU mmap() to directly map the device (if not NULL)
+ * - return the address to which the offset maps
+ * - return -ENOSYS to indicate refusal to do the mapping
+ */
+static inline unsigned long mtd_get_unmapped_area(struct mtd_info *mtd,
+						  unsigned long len,
+						  unsigned long offset,
+						  unsigned long flags)
+{
+	return mtd->get_unmapped_area(mtd, len, offset, flags);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 329ad399a9b3adf52c90637b21ca029fcf7f8795 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:30:16 +0200
Subject: mtd: introduce mtd_read interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/arm/mach-davinci/board-da850-evm.c   |  2 +-
 arch/cris/arch-v32/drivers/axisflashmap.c |  4 +--
 drivers/mtd/afs.c                         |  4 +--
 drivers/mtd/ar7part.c                     | 15 ++++++-----
 drivers/mtd/bcm63xxpart.c                 | 12 ++++-----
 drivers/mtd/ftl.c                         | 41 ++++++++++++++++---------------
 drivers/mtd/inftlcore.c                   | 19 ++++++++------
 drivers/mtd/inftlmount.c                  | 10 ++++----
 drivers/mtd/mtdblock.c                    |  8 +++---
 drivers/mtd/mtdblock_ro.c                 |  2 +-
 drivers/mtd/mtdchar.c                     |  2 +-
 drivers/mtd/mtdconcat.c                   |  2 +-
 drivers/mtd/mtdoops.c                     |  4 +--
 drivers/mtd/mtdpart.c                     |  3 +--
 drivers/mtd/mtdswap.c                     |  4 +--
 drivers/mtd/nand/diskonchip.c             |  4 +--
 drivers/mtd/nand/nand_bbt.c               |  6 ++---
 drivers/mtd/nftlcore.c                    | 17 ++++++++-----
 drivers/mtd/nftlmount.c                   |  6 ++---
 drivers/mtd/redboot.c                     |  4 +--
 drivers/mtd/rfd_ftl.c                     | 24 +++++++++---------
 drivers/mtd/ssfdc.c                       |  6 ++---
 drivers/mtd/tests/mtd_pagetest.c          | 28 ++++++++++-----------
 drivers/mtd/tests/mtd_readtest.c          |  2 +-
 drivers/mtd/tests/mtd_speedtest.c         |  8 +++---
 drivers/mtd/tests/mtd_stresstest.c        |  2 +-
 drivers/mtd/tests/mtd_subpagetest.c       |  8 +++---
 drivers/mtd/tests/mtd_torturetest.c       |  2 +-
 drivers/mtd/ubi/debug.c                   |  2 +-
 drivers/mtd/ubi/io.c                      |  6 ++---
 drivers/staging/spectra/lld_mtd.c         |  8 +++---
 fs/jffs2/erase.c                          |  2 +-
 fs/jffs2/wbuf.c                           |  9 ++++---
 fs/logfs/dev_mtd.c                        |  2 +-
 include/linux/mtd/mtd.h                   |  9 ++++++-
 35 files changed, 152 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c
index 6659a90dbcad..8b079f9d6924 100644
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -127,7 +127,7 @@ static void da850_evm_m25p80_notify_add(struct mtd_info *mtd)
 	size_t retlen;
 
 	if (!strcmp(mtd->name, "MAC-Address")) {
-		mtd->read(mtd, 0, ETH_ALEN, &retlen, mac_addr);
+		mtd_read(mtd, 0, ETH_ALEN, &retlen, mac_addr);
 		if (retlen == ETH_ALEN)
 			pr_info("Read MAC addr from SPI Flash: %pM\n",
 				mac_addr);
diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index a2bde3744622..011bddbf073f 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -413,8 +413,8 @@ static int __init init_axis_flash(void)
 		} while (blockstat && ptable_sector);
 #endif
 		if (ptable_sector) {
-			main_mtd->read(main_mtd, ptable_sector, PAGESIZE,
-				&len, page);
+			mtd_read(main_mtd, ptable_sector, PAGESIZE, &len,
+				 page);
 			ptable_head = &((struct partitiontable *) page)->head;
 		}
 
diff --git a/drivers/mtd/afs.c b/drivers/mtd/afs.c
index 89a02f6f65dc..5a3942bf109c 100644
--- a/drivers/mtd/afs.c
+++ b/drivers/mtd/afs.c
@@ -75,7 +75,7 @@ afs_read_footer(struct mtd_info *mtd, u_int *img_start, u_int *iis_start,
 	size_t sz;
 	int ret;
 
-	ret = mtd->read(mtd, ptr, sizeof(fs), &sz, (u_char *) &fs);
+	ret = mtd_read(mtd, ptr, sizeof(fs), &sz, (u_char *)&fs);
 	if (ret >= 0 && sz != sizeof(fs))
 		ret = -EINVAL;
 
@@ -132,7 +132,7 @@ afs_read_iis(struct mtd_info *mtd, struct image_info_struct *iis, u_int ptr)
 	int ret, i;
 
 	memset(iis, 0, sizeof(*iis));
-	ret = mtd->read(mtd, ptr, sizeof(*iis), &sz, (u_char *) iis);
+	ret = mtd_read(mtd, ptr, sizeof(*iis), &sz, (u_char *)iis);
 	if (ret < 0)
 		goto failed;
 
diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c
index f40ea4547554..945393129952 100644
--- a/drivers/mtd/ar7part.c
+++ b/drivers/mtd/ar7part.c
@@ -73,8 +73,8 @@ static int create_mtd_partitions(struct mtd_info *master,
 
 	do { /* Try 10 blocks starting from master->erasesize */
 		offset = pre_size;
-		master->read(master, offset,
-			     sizeof(header), &len, (uint8_t *)&header);
+		mtd_read(master, offset, sizeof(header), &len,
+			 (uint8_t *)&header);
 		if (!strncmp((char *)&header, "TIENV0.8", 8))
 			ar7_parts[1].offset = pre_size;
 		if (header.checksum == LOADER_MAGIC1)
@@ -95,16 +95,16 @@ static int create_mtd_partitions(struct mtd_info *master,
 	case LOADER_MAGIC1:
 		while (header.length) {
 			offset += sizeof(header) + header.length;
-			master->read(master, offset, sizeof(header),
-				     &len, (uint8_t *)&header);
+			mtd_read(master, offset, sizeof(header), &len,
+				 (uint8_t *)&header);
 		}
 		root_offset = offset + sizeof(header) + 4;
 		break;
 	case LOADER_MAGIC2:
 		while (header.length) {
 			offset += sizeof(header) + header.length;
-			master->read(master, offset, sizeof(header),
-				     &len, (uint8_t *)&header);
+			mtd_read(master, offset, sizeof(header), &len,
+				 (uint8_t *)&header);
 		}
 		root_offset = offset + sizeof(header) + 4 + 0xff;
 		root_offset &= ~(uint32_t)0xff;
@@ -114,8 +114,7 @@ static int create_mtd_partitions(struct mtd_info *master,
 		break;
 	}
 
-	master->read(master, root_offset,
-		sizeof(header), &len, (u8 *)&header);
+	mtd_read(master, root_offset, sizeof(header), &len, (u8 *)&header);
 	if (header.checksum != SQUASHFS_MAGIC) {
 		root_offset += master->erasesize - 1;
 		root_offset &= ~(master->erasesize - 1);
diff --git a/drivers/mtd/bcm63xxpart.c b/drivers/mtd/bcm63xxpart.c
index 9ee8bc426e93..608321ee056e 100644
--- a/drivers/mtd/bcm63xxpart.c
+++ b/drivers/mtd/bcm63xxpart.c
@@ -48,8 +48,8 @@ static int bcm63xx_detect_cfe(struct mtd_info *master)
 	int ret;
 	size_t retlen;
 
-	ret = master->read(master, BCM963XX_CFE_VERSION_OFFSET, 5, &retlen,
-			   (void *)buf);
+	ret = mtd_read(master, BCM963XX_CFE_VERSION_OFFSET, 5, &retlen,
+		       (void *)buf);
 	buf[retlen] = 0;
 
 	if (ret)
@@ -59,8 +59,8 @@ static int bcm63xx_detect_cfe(struct mtd_info *master)
 		return 0;
 
 	/* very old CFE's do not have the cfe-v string, so check for magic */
-	ret = master->read(master, BCM63XX_CFE_MAGIC_OFFSET, 8, &retlen,
-			   (void *)buf);
+	ret = mtd_read(master, BCM63XX_CFE_MAGIC_OFFSET, 8, &retlen,
+		       (void *)buf);
 	buf[retlen] = 0;
 
 	return strncmp("CFE1CFE1", buf, 8);
@@ -95,8 +95,8 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
 		return -ENOMEM;
 
 	/* Get the tag */
-	ret = master->read(master, cfelen, sizeof(struct bcm_tag), &retlen,
-			   (void *)buf);
+	ret = mtd_read(master, cfelen, sizeof(struct bcm_tag), &retlen,
+		       (void *)buf);
 
 	if (retlen != sizeof(struct bcm_tag)) {
 		vfree(buf);
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index a982889277c8..12fd7ebd3fd8 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -168,8 +168,8 @@ static int scan_header(partition_t *part)
 	 (offset + sizeof(header)) < max_offset;
 	 offset += part->mbd.mtd->erasesize ? : 0x2000) {
 
-	err = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(header), &ret,
-			      (unsigned char *)&header);
+	err = mtd_read(part->mbd.mtd, offset, sizeof(header), &ret,
+                       (unsigned char *)&header);
 
 	if (err)
 	    return err;
@@ -224,8 +224,8 @@ static int build_maps(partition_t *part)
     for (i = 0; i < le16_to_cpu(part->header.NumEraseUnits); i++) {
 	offset = ((i + le16_to_cpu(part->header.FirstPhysicalEUN))
 		      << part->header.EraseUnitSize);
-	ret = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(header), &retval,
-			      (unsigned char *)&header);
+	ret = mtd_read(part->mbd.mtd, offset, sizeof(header), &retval,
+                       (unsigned char *)&header);
 
 	if (ret)
 	    goto out_XferInfo;
@@ -289,9 +289,9 @@ static int build_maps(partition_t *part)
 	part->EUNInfo[i].Deleted = 0;
 	offset = part->EUNInfo[i].Offset + le32_to_cpu(header.BAMOffset);
 
-	ret = part->mbd.mtd->read(part->mbd.mtd, offset,
-			      part->BlocksPerUnit * sizeof(uint32_t), &retval,
-			      (unsigned char *)part->bam_cache);
+	ret = mtd_read(part->mbd.mtd, offset,
+                       part->BlocksPerUnit * sizeof(uint32_t), &retval,
+                       (unsigned char *)part->bam_cache);
 
 	if (ret)
 		goto out_bam_cache;
@@ -485,9 +485,9 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
 
 	offset = eun->Offset + le32_to_cpu(part->header.BAMOffset);
 
-	ret = part->mbd.mtd->read(part->mbd.mtd, offset,
-			      part->BlocksPerUnit * sizeof(uint32_t),
-			      &retlen, (u_char *) (part->bam_cache));
+	ret = mtd_read(part->mbd.mtd, offset,
+                       part->BlocksPerUnit * sizeof(uint32_t), &retlen,
+                       (u_char *)(part->bam_cache));
 
 	/* mark the cache bad, in case we get an error later */
 	part->bam_index = 0xffff;
@@ -523,8 +523,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
 	    break;
 	case BLOCK_DATA:
 	case BLOCK_REPLACEMENT:
-	    ret = part->mbd.mtd->read(part->mbd.mtd, src, SECTOR_SIZE,
-                        &retlen, (u_char *) buf);
+	    ret = mtd_read(part->mbd.mtd, src, SECTOR_SIZE, &retlen,
+                           (u_char *)buf);
 	    if (ret) {
 		printk(KERN_WARNING "ftl: Error reading old xfer unit in copy_erase_unit\n");
 		return ret;
@@ -747,10 +747,11 @@ static uint32_t find_free(partition_t *part)
 	/* Invalidate cache */
 	part->bam_index = 0xffff;
 
-	ret = part->mbd.mtd->read(part->mbd.mtd,
-		       part->EUNInfo[eun].Offset + le32_to_cpu(part->header.BAMOffset),
-		       part->BlocksPerUnit * sizeof(uint32_t),
-		       &retlen, (u_char *) (part->bam_cache));
+	ret = mtd_read(part->mbd.mtd,
+                       part->EUNInfo[eun].Offset + le32_to_cpu(part->header.BAMOffset),
+                       part->BlocksPerUnit * sizeof(uint32_t),
+                       &retlen,
+                       (u_char *)(part->bam_cache));
 
 	if (ret) {
 	    printk(KERN_WARNING"ftl: Error reading BAM in find_free\n");
@@ -810,8 +811,8 @@ static int ftl_read(partition_t *part, caddr_t buffer,
 	else {
 	    offset = (part->EUNInfo[log_addr / bsize].Offset
 			  + (log_addr % bsize));
-	    ret = part->mbd.mtd->read(part->mbd.mtd, offset, SECTOR_SIZE,
-			   &retlen, (u_char *) buffer);
+	    ret = mtd_read(part->mbd.mtd, offset, SECTOR_SIZE, &retlen,
+                           (u_char *)buffer);
 
 	    if (ret) {
 		printk(KERN_WARNING "Error reading MTD device in ftl_read()\n");
@@ -849,8 +850,8 @@ static int set_bam_entry(partition_t *part, uint32_t log_addr,
 		  le32_to_cpu(part->header.BAMOffset));
 
 #ifdef PSYCHO_DEBUG
-    ret = part->mbd.mtd->read(part->mbd.mtd, offset, sizeof(uint32_t),
-                        &retlen, (u_char *)&old_addr);
+    ret = mtd_read(part->mbd.mtd, offset, sizeof(uint32_t), &retlen,
+                   (u_char *)&old_addr);
     if (ret) {
 	printk(KERN_WARNING"ftl: Error reading old_addr in set_bam_entry: %d\n",ret);
 	return ret;
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index dd034efd1875..0b038bed7b9c 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -343,14 +343,17 @@ static u16 INFTL_foldchain(struct INFTLrecord *inftl, unsigned thisVUC, unsigned
 		if (BlockMap[block] == BLOCK_NIL)
 			continue;
 
-		ret = mtd->read(mtd, (inftl->EraseSize * BlockMap[block]) +
-				(block * SECTORSIZE), SECTORSIZE, &retlen,
-				movebuf);
+		ret = mtd_read(mtd,
+			       (inftl->EraseSize * BlockMap[block]) + (block * SECTORSIZE),
+			       SECTORSIZE,
+			       &retlen,
+			       movebuf);
 		if (ret < 0 && !mtd_is_bitflip(ret)) {
-			ret = mtd->read(mtd,
-					(inftl->EraseSize * BlockMap[block]) +
-					(block * SECTORSIZE), SECTORSIZE,
-					&retlen, movebuf);
+			ret = mtd_read(mtd,
+				       (inftl->EraseSize * BlockMap[block]) + (block * SECTORSIZE),
+				       SECTORSIZE,
+				       &retlen,
+				       movebuf);
 			if (ret != -EIO)
 				pr_debug("INFTL: error went away on retry?\n");
 		}
@@ -914,7 +917,7 @@ foundit:
 	} else {
 		size_t retlen;
 		loff_t ptr = (thisEUN * inftl->EraseSize) + blockofs;
-		int ret = mtd->read(mtd, ptr, SECTORSIZE, &retlen, buffer);
+		int ret = mtd_read(mtd, ptr, SECTORSIZE, &retlen, buffer);
 
 		/* Handle corrected bit flips gracefully */
 		if (ret < 0 && !mtd_is_bitflip(ret))
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 0d946f10a682..9bfbca5d88d6 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -73,8 +73,8 @@ static int find_boot_record(struct INFTLrecord *inftl)
 		 * Check for BNAND header first. Then whinge if it's found
 		 * but later checks fail.
 		 */
-		ret = mtd->read(mtd, block * inftl->EraseSize,
-				SECTORSIZE, &retlen, buf);
+		ret = mtd_read(mtd, block * inftl->EraseSize, SECTORSIZE,
+			       &retlen, buf);
 		/* We ignore ret in case the ECC of the MediaHeader is invalid
 		   (which is apparently acceptable) */
 		if (retlen != SECTORSIZE) {
@@ -118,8 +118,8 @@ static int find_boot_record(struct INFTLrecord *inftl)
 		memcpy(mh, buf, sizeof(struct INFTLMediaHeader));
 
 		/* Read the spare media header at offset 4096 */
-		mtd->read(mtd, block * inftl->EraseSize + 4096,
-			  SECTORSIZE, &retlen, buf);
+		mtd_read(mtd, block * inftl->EraseSize + 4096, SECTORSIZE,
+			 &retlen, buf);
 		if (retlen != SECTORSIZE) {
 			printk(KERN_WARNING "INFTL: Unable to read spare "
 			       "Media Header\n");
@@ -342,7 +342,7 @@ static int check_free_sectors(struct INFTLrecord *inftl, unsigned int address,
 	int i;
 
 	for (i = 0; i < len; i += SECTORSIZE) {
-		if (mtd->read(mtd, address, SECTORSIZE, &retlen, buf))
+		if (mtd_read(mtd, address, SECTORSIZE, &retlen, buf))
 			return -1;
 		if (memcmpb(buf, 0xff, SECTORSIZE) != 0)
 			return -1;
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 9b01cb0266e4..b0644d2d2a6e 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -184,8 +184,8 @@ static int do_cached_write (struct mtdblk_dev *mtdblk, unsigned long pos,
 			    mtdblk->cache_offset != sect_start) {
 				/* fill the cache with the current sector */
 				mtdblk->cache_state = STATE_EMPTY;
-				ret = mtd->read(mtd, sect_start, sect_size,
-						&retlen, mtdblk->cache_data);
+				ret = mtd_read(mtd, sect_start, sect_size,
+					       &retlen, mtdblk->cache_data);
 				if (ret)
 					return ret;
 				if (retlen != sect_size)
@@ -222,7 +222,7 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
 			mtd->name, pos, len);
 
 	if (!sect_size)
-		return mtd->read(mtd, pos, len, &retlen, buf);
+		return mtd_read(mtd, pos, len, &retlen, buf);
 
 	while (len > 0) {
 		unsigned long sect_start = (pos/sect_size)*sect_size;
@@ -241,7 +241,7 @@ static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
 		    mtdblk->cache_offset == sect_start) {
 			memcpy (buf, mtdblk->cache_data + offset, size);
 		} else {
-			ret = mtd->read(mtd, pos, size, &retlen, buf);
+			ret = mtd_read(mtd, pos, size, &retlen, buf);
 			if (ret)
 				return ret;
 			if (retlen != size)
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index 0470a6e86309..f5737b1153fa 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -30,7 +30,7 @@ static int mtdblock_readsect(struct mtd_blktrans_dev *dev,
 {
 	size_t retlen;
 
-	if (dev->mtd->read(dev->mtd, (block * 512), 512, &retlen, buf))
+	if (mtd_read(dev->mtd, (block * 512), 512, &retlen, buf))
 		return 1;
 	return 0;
 }
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index c51f04a00afb..c7f484687fa3 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -232,7 +232,7 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
 			break;
 		}
 		default:
-			ret = mtd->read(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_read(mtd, *ppos, len, &retlen, kbuf);
 		}
 		/* Nand returns -EBADMSG on ECC errors, but it returns
 		 * the data. For our userspace tools it is important
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index b3895cf20bb2..45460349fd12 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -91,7 +91,7 @@ concat_read(struct mtd_info *mtd, loff_t from, size_t len,
 			/* Entire transaction goes into this subdev */
 			size = len;
 
-		err = subdev->read(subdev, from, size, &retsize, buf);
+		err = mtd_read(subdev, from, size, &retsize, buf);
 
 		/* Save information about bitflips! */
 		if (unlikely(err)) {
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 9b2d86323169..23629ad08507 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -258,8 +258,8 @@ static void find_next_position(struct mtdoops_context *cxt)
 			continue;
 		/* Assume the page is used */
 		mark_page_used(cxt, page);
-		ret = mtd->read(mtd, page * record_size, MTDOOPS_HEADER_SIZE,
-				&retlen, (u_char *) &count[0]);
+		ret = mtd_read(mtd, page * record_size, MTDOOPS_HEADER_SIZE,
+			       &retlen, (u_char *)&count[0]);
 		if (retlen != MTDOOPS_HEADER_SIZE ||
 				(ret < 0 && !mtd_is_bitflip(ret))) {
 			printk(KERN_ERR "mtdoops: read failure at %ld (%td of %d read), err %d\n",
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 55a9cb544fc1..59cd7974bc50 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -70,8 +70,7 @@ static int part_read(struct mtd_info *mtd, loff_t from, size_t len,
 		len = 0;
 	else if (from + len > mtd->size)
 		len = mtd->size - from;
-	res = part->master->read(part->master, from + part->offset,
-				   len, retlen, buf);
+	res = mtd_read(part->master, from + part->offset, len, retlen, buf);
 	if (unlikely(res)) {
 		if (mtd_is_bitflip(res))
 			mtd->ecc_stats.corrected += part->master->ecc_stats.corrected - stats.corrected;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 4e12875a916c..b3282d2aa8f8 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -736,7 +736,7 @@ static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock,
 	retries = 0;
 
 retry:
-	ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
+	ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf);
 
 	if (ret < 0 && !mtd_is_bitflip(ret)) {
 		oldeb = d->eb_data + oldblock / d->pages_per_eblk;
@@ -1161,7 +1161,7 @@ static int mtdswap_readsect(struct mtd_blktrans_dev *dev,
 	retries = 0;
 
 retry:
-	ret = mtd->read(mtd, readpos, PAGE_SIZE, &retlen, buf);
+	ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf);
 
 	d->mtd_read_count++;
 	if (mtd_is_bitflip(ret)) {
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index 5780dbab6113..df921e7a496c 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -1072,7 +1072,7 @@ static int __init find_media_headers(struct mtd_info *mtd, u_char *buf, const ch
 	size_t retlen;
 
 	for (offs = 0; offs < mtd->size; offs += mtd->erasesize) {
-		ret = mtd->read(mtd, offs, mtd->writesize, &retlen, buf);
+		ret = mtd_read(mtd, offs, mtd->writesize, &retlen, buf);
 		if (retlen != mtd->writesize)
 			continue;
 		if (ret) {
@@ -1097,7 +1097,7 @@ static int __init find_media_headers(struct mtd_info *mtd, u_char *buf, const ch
 	/* Only one mediaheader was found.  We want buf to contain a
 	   mediaheader on return, so we'll have to re-read the one we found. */
 	offs = doc->mh0_page << this->page_shift;
-	ret = mtd->read(mtd, offs, mtd->writesize, &retlen, buf);
+	ret = mtd_read(mtd, offs, mtd->writesize, &retlen, buf);
 	if (retlen != mtd->writesize) {
 		/* Insanity.  Give up. */
 		printk(KERN_ERR "Read DiskOnChip Media Header once, but can't reread it???\n");
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 69148ae3bf58..1bcd6bc6798c 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -201,7 +201,7 @@ static int read_bbt(struct mtd_info *mtd, uint8_t *buf, int page, int num,
 			from += marker_len;
 			marker_len = 0;
 		}
-		res = mtd->read(mtd, from, len, &retlen, buf);
+		res = mtd_read(mtd, from, len, &retlen, buf);
 		if (res < 0) {
 			if (mtd_is_eccerr(res)) {
 				pr_info("nand_bbt: ECC error in BBT at "
@@ -298,7 +298,7 @@ static int scan_read_raw_data(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
 	if (td->options & NAND_BBT_VERSION)
 		len++;
 
-	return mtd->read(mtd, offs, len, &retlen, buf);
+	return mtd_read(mtd, offs, len, &retlen, buf);
 }
 
 /* Scan read raw data from flash */
@@ -756,7 +756,7 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf,
 			/* Make it block aligned */
 			to &= ~((loff_t)((1 << this->bbt_erase_shift) - 1));
 			len = 1 << this->bbt_erase_shift;
-			res = mtd->read(mtd, to, len, &retlen, buf);
+			res = mtd_read(mtd, to, len, &retlen, buf);
 			if (res < 0) {
 				if (retlen != len) {
 					pr_info("nand_bbt: error reading block "
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index cda77b562ad4..1a9d9c1d3a74 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -423,12 +423,17 @@ static u16 NFTL_foldchain (struct NFTLrecord *nftl, unsigned thisVUC, unsigned p
 		if (BlockMap[block] == BLOCK_NIL)
 			continue;
 
-		ret = mtd->read(mtd, (nftl->EraseSize * BlockMap[block]) + (block * 512),
-				512, &retlen, movebuf);
+		ret = mtd_read(mtd,
+			       (nftl->EraseSize * BlockMap[block]) + (block * 512),
+			       512,
+			       &retlen,
+			       movebuf);
 		if (ret < 0 && !mtd_is_bitflip(ret)) {
-			ret = mtd->read(mtd, (nftl->EraseSize * BlockMap[block])
-					+ (block * 512), 512, &retlen,
-					movebuf);
+			ret = mtd_read(mtd,
+				       (nftl->EraseSize * BlockMap[block]) + (block * 512),
+				       512,
+				       &retlen,
+				       movebuf);
 			if (ret != -EIO)
 				printk("Error went away on retry.\n");
 		}
@@ -771,7 +776,7 @@ static int nftl_readblock(struct mtd_blktrans_dev *mbd, unsigned long block,
 	} else {
 		loff_t ptr = (lastgoodEUN * nftl->EraseSize) + blockofs;
 		size_t retlen;
-		int res = mtd->read(mtd, ptr, 512, &retlen, buffer);
+		int res = mtd_read(mtd, ptr, 512, &retlen, buffer);
 
 		if (res < 0 && !mtd_is_bitflip(res))
 			return -EIO;
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index 9164a56fb5c0..b068dc8a3666 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -63,8 +63,8 @@ static int find_boot_record(struct NFTLrecord *nftl)
 
 		/* Check for ANAND header first. Then can whinge if it's found but later
 		   checks fail */
-		ret = mtd->read(mtd, block * nftl->EraseSize, SECTORSIZE,
-				&retlen, buf);
+		ret = mtd_read(mtd, block * nftl->EraseSize, SECTORSIZE,
+			       &retlen, buf);
 		/* We ignore ret in case the ECC of the MediaHeader is invalid
 		   (which is apparently acceptable) */
 		if (retlen != SECTORSIZE) {
@@ -274,7 +274,7 @@ static int check_free_sectors(struct NFTLrecord *nftl, unsigned int address, int
 	int i;
 
 	for (i = 0; i < len; i += SECTORSIZE) {
-		if (mtd->read(mtd, address, SECTORSIZE, &retlen, buf))
+		if (mtd_read(mtd, address, SECTORSIZE, &retlen, buf))
 			return -1;
 		if (memcmpb(buf, 0xff, SECTORSIZE) != 0)
 			return -1;
diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c
index e366b1d84ead..623d9b86d0d9 100644
--- a/drivers/mtd/redboot.c
+++ b/drivers/mtd/redboot.c
@@ -104,8 +104,8 @@ static int parse_redboot_partitions(struct mtd_info *master,
 	printk(KERN_NOTICE "Searching for RedBoot partition table in %s at offset 0x%lx\n",
 	       master->name, offset);
 
-	ret = master->read(master, offset,
-			   master->erasesize, &retlen, (void *)buf);
+	ret = mtd_read(master, offset, master->erasesize, &retlen,
+		       (void *)buf);
 
 	if (ret)
 		goto out;
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index 39de8727a524..d9fe2d0533d9 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -200,9 +200,9 @@ static int scan_header(struct partition *part)
 		part->sector_map[i] = -1;
 
 	for (i=0, blocks_found=0; i<part->total_blocks; i++) {
-		rc = part->mbd.mtd->read(part->mbd.mtd,
-				i * part->block_size, part->header_size,
-				&retlen, (u_char*)part->header_cache);
+		rc = mtd_read(part->mbd.mtd, i * part->block_size,
+			      part->header_size, &retlen,
+			      (u_char *)part->header_cache);
 
 		if (!rc && retlen != part->header_size)
 			rc = -EIO;
@@ -250,8 +250,8 @@ static int rfd_ftl_readsect(struct mtd_blktrans_dev *dev, u_long sector, char *b
 
 	addr = part->sector_map[sector];
 	if (addr != -1) {
-		rc = part->mbd.mtd->read(part->mbd.mtd, addr, SECTOR_SIZE,
-						&retlen, (u_char*)buf);
+		rc = mtd_read(part->mbd.mtd, addr, SECTOR_SIZE, &retlen,
+			      (u_char *)buf);
 		if (!rc && retlen != SECTOR_SIZE)
 			rc = -EIO;
 
@@ -372,9 +372,8 @@ static int move_block_contents(struct partition *part, int block_no, u_long *old
 	if (!map)
 		goto err2;
 
-	rc = part->mbd.mtd->read(part->mbd.mtd,
-		part->blocks[block_no].offset, part->header_size,
-		&retlen, (u_char*)map);
+	rc = mtd_read(part->mbd.mtd, part->blocks[block_no].offset,
+		      part->header_size, &retlen, (u_char *)map);
 
 	if (!rc && retlen != part->header_size)
 		rc = -EIO;
@@ -413,8 +412,8 @@ static int move_block_contents(struct partition *part, int block_no, u_long *old
 			}
 			continue;
 		}
-		rc = part->mbd.mtd->read(part->mbd.mtd, addr,
-			SECTOR_SIZE, &retlen, sector_data);
+		rc = mtd_read(part->mbd.mtd, addr, SECTOR_SIZE, &retlen,
+			      sector_data);
 
 		if (!rc && retlen != SECTOR_SIZE)
 			rc = -EIO;
@@ -563,8 +562,9 @@ static int find_writable_block(struct partition *part, u_long *old_sector)
 		}
 	}
 
-	rc = part->mbd.mtd->read(part->mbd.mtd, part->blocks[block].offset,
-		part->header_size, &retlen, (u_char*)part->header_cache);
+	rc = mtd_read(part->mbd.mtd, part->blocks[block].offset,
+		      part->header_size, &retlen,
+		      (u_char *)part->header_cache);
 
 	if (!rc && retlen != part->header_size)
 		rc = -EIO;
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index 976e3d28b962..293e22a5710f 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -123,8 +123,8 @@ static int get_valid_cis_sector(struct mtd_info *mtd)
 	 */
 	for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) {
 		if (!mtd->block_isbad(mtd, offset)) {
-			ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen,
-				sect_buf);
+			ret = mtd_read(mtd, offset, SECTOR_SIZE, &retlen,
+				       sect_buf);
 
 			/* CIS pattern match on the sector buffer */
 			if (ret < 0 || retlen != SECTOR_SIZE) {
@@ -156,7 +156,7 @@ static int read_physical_sector(struct mtd_info *mtd, uint8_t *sect_buf,
 	size_t retlen;
 	loff_t offset = (loff_t)sect_no << SECTOR_SHIFT;
 
-	ret = mtd->read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf);
+	ret = mtd_read(mtd, offset, SECTOR_SIZE, &retlen, sect_buf);
 	if (ret < 0 || retlen != SECTOR_SIZE)
 		return -1;
 
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index 271819fabb55..6d62e24a03ed 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -127,7 +127,7 @@ static int verify_eraseblock(int ebnum)
 	set_random_data(writebuf, mtd->erasesize);
 	for (j = 0; j < pgcnt - 1; ++j, addr += pgsize) {
 		/* Do a read to set the internal dataRAMs to different data */
-		err = mtd->read(mtd, addr0, bufsize, &read, twopages);
+		err = mtd_read(mtd, addr0, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -135,7 +135,7 @@ static int verify_eraseblock(int ebnum)
 			       (long long)addr0);
 			return err;
 		}
-		err = mtd->read(mtd, addrn - bufsize, bufsize, &read, twopages);
+		err = mtd_read(mtd, addrn - bufsize, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -145,7 +145,7 @@ static int verify_eraseblock(int ebnum)
 		}
 		memset(twopages, 0, bufsize);
 		read = 0;
-		err = mtd->read(mtd, addr, bufsize, &read, twopages);
+		err = mtd_read(mtd, addr, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -163,7 +163,7 @@ static int verify_eraseblock(int ebnum)
 	if (addr <= addrn - pgsize - pgsize && !bbt[ebnum + 1]) {
 		unsigned long oldnext = next;
 		/* Do a read to set the internal dataRAMs to different data */
-		err = mtd->read(mtd, addr0, bufsize, &read, twopages);
+		err = mtd_read(mtd, addr0, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -171,7 +171,7 @@ static int verify_eraseblock(int ebnum)
 			       (long long)addr0);
 			return err;
 		}
-		err = mtd->read(mtd, addrn - bufsize, bufsize, &read, twopages);
+		err = mtd_read(mtd, addrn - bufsize, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -181,7 +181,7 @@ static int verify_eraseblock(int ebnum)
 		}
 		memset(twopages, 0, bufsize);
 		read = 0;
-		err = mtd->read(mtd, addr, bufsize, &read, twopages);
+		err = mtd_read(mtd, addr, bufsize, &read, twopages);
 		if (mtd_is_bitflip(err))
 			err = 0;
 		if (err || read != bufsize) {
@@ -230,7 +230,7 @@ static int crosstest(void)
 	/* Read 2nd-to-last page to pp1 */
 	read = 0;
 	addr = addrn - pgsize - pgsize;
-	err = mtd->read(mtd, addr, pgsize, &read, pp1);
+	err = mtd_read(mtd, addr, pgsize, &read, pp1);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -243,7 +243,7 @@ static int crosstest(void)
 	/* Read 3rd-to-last page to pp1 */
 	read = 0;
 	addr = addrn - pgsize - pgsize - pgsize;
-	err = mtd->read(mtd, addr, pgsize, &read, pp1);
+	err = mtd_read(mtd, addr, pgsize, &read, pp1);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -257,7 +257,7 @@ static int crosstest(void)
 	read = 0;
 	addr = addr0;
 	printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
-	err = mtd->read(mtd, addr, pgsize, &read, pp2);
+	err = mtd_read(mtd, addr, pgsize, &read, pp2);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -271,7 +271,7 @@ static int crosstest(void)
 	read = 0;
 	addr = addrn - pgsize;
 	printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
-	err = mtd->read(mtd, addr, pgsize, &read, pp3);
+	err = mtd_read(mtd, addr, pgsize, &read, pp3);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -285,7 +285,7 @@ static int crosstest(void)
 	read = 0;
 	addr = addr0;
 	printk(PRINT_PREF "reading page at %#llx\n", (long long)addr);
-	err = mtd->read(mtd, addr, pgsize, &read, pp4);
+	err = mtd_read(mtd, addr, pgsize, &read, pp4);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -344,7 +344,7 @@ static int erasecrosstest(void)
 
 	printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
 	memset(readbuf, 0, pgsize);
-	err = mtd->read(mtd, addr0, pgsize, &read, readbuf);
+	err = mtd_read(mtd, addr0, pgsize, &read, readbuf);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -382,7 +382,7 @@ static int erasecrosstest(void)
 
 	printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
 	memset(readbuf, 0, pgsize);
-	err = mtd->read(mtd, addr0, pgsize, &read, readbuf);
+	err = mtd_read(mtd, addr0, pgsize, &read, readbuf);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
@@ -438,7 +438,7 @@ static int erasetest(void)
 		return err;
 
 	printk(PRINT_PREF "reading 1st page of block %d\n", ebnum);
-	err = mtd->read(mtd, addr0, pgsize, &read, twopages);
+	err = mtd_read(mtd, addr0, pgsize, &read, twopages);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (err || read != pgsize) {
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
index 550fe51225a7..0c58d2976c76 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/mtd_readtest.c
@@ -52,7 +52,7 @@ static int read_eraseblock_by_page(int ebnum)
 
 	for (i = 0; i < pgcnt; i++) {
 		memset(buf, 0 , pgcnt);
-		ret = mtd->read(mtd, addr, pgsize, &read, buf);
+		ret = mtd_read(mtd, addr, pgsize, &read, buf);
 		if (ret == -EUCLEAN)
 			ret = 0;
 		if (ret || read != pgsize) {
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index f67a65e21043..3c9529bd0a62 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -214,7 +214,7 @@ static int read_eraseblock(int ebnum)
 	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	err = mtd->read(mtd, addr, mtd->erasesize, &read, iobuf);
+	err = mtd_read(mtd, addr, mtd->erasesize, &read, iobuf);
 	/* Ignore corrected ECC errors */
 	if (mtd_is_bitflip(err))
 		err = 0;
@@ -235,7 +235,7 @@ static int read_eraseblock_by_page(int ebnum)
 	void *buf = iobuf;
 
 	for (i = 0; i < pgcnt; i++) {
-		err = mtd->read(mtd, addr, pgsize, &read, buf);
+		err = mtd_read(mtd, addr, pgsize, &read, buf);
 		/* Ignore corrected ECC errors */
 		if (mtd_is_bitflip(err))
 			err = 0;
@@ -261,7 +261,7 @@ static int read_eraseblock_by_2pages(int ebnum)
 	void *buf = iobuf;
 
 	for (i = 0; i < n; i++) {
-		err = mtd->read(mtd, addr, sz, &read, buf);
+		err = mtd_read(mtd, addr, sz, &read, buf);
 		/* Ignore corrected ECC errors */
 		if (mtd_is_bitflip(err))
 			err = 0;
@@ -276,7 +276,7 @@ static int read_eraseblock_by_2pages(int ebnum)
 		buf += sz;
 	}
 	if (pgcnt % 2) {
-		err = mtd->read(mtd, addr, pgsize, &read, buf);
+		err = mtd_read(mtd, addr, pgsize, &read, buf);
 		/* Ignore corrected ECC errors */
 		if (mtd_is_bitflip(err))
 			err = 0;
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index a204a9f90524..83a843723880 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -153,7 +153,7 @@ static int do_read(void)
 			len = mtd->erasesize - offs;
 	}
 	addr = eb * mtd->erasesize + offs;
-	err = mtd->read(mtd, addr, len, &read, readbuf);
+	err = mtd_read(mtd, addr, len, &read, readbuf);
 	if (mtd_is_bitflip(err))
 		err = 0;
 	if (unlikely(err || read != len)) {
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index 16d0c05024d7..d81f89a19daa 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -196,7 +196,7 @@ static int verify_eraseblock(int ebnum)
 	set_random_data(writebuf, subpgsize);
 	clear_data(readbuf, subpgsize);
 	read = 0;
-	err = mtd->read(mtd, addr, subpgsize, &read, readbuf);
+	err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 	if (unlikely(err || read != subpgsize)) {
 		if (mtd_is_bitflip(err) && read == subpgsize) {
 			printk(PRINT_PREF "ECC correction at %#llx\n",
@@ -224,7 +224,7 @@ static int verify_eraseblock(int ebnum)
 	set_random_data(writebuf, subpgsize);
 	clear_data(readbuf, subpgsize);
 	read = 0;
-	err = mtd->read(mtd, addr, subpgsize, &read, readbuf);
+	err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 	if (unlikely(err || read != subpgsize)) {
 		if (mtd_is_bitflip(err) && read == subpgsize) {
 			printk(PRINT_PREF "ECC correction at %#llx\n",
@@ -262,7 +262,7 @@ static int verify_eraseblock2(int ebnum)
 		set_random_data(writebuf, subpgsize * k);
 		clear_data(readbuf, subpgsize * k);
 		read = 0;
-		err = mtd->read(mtd, addr, subpgsize * k, &read, readbuf);
+		err = mtd_read(mtd, addr, subpgsize * k, &read, readbuf);
 		if (unlikely(err || read != subpgsize * k)) {
 			if (mtd_is_bitflip(err) && read == subpgsize * k) {
 				printk(PRINT_PREF "ECC correction at %#llx\n",
@@ -296,7 +296,7 @@ static int verify_eraseblock_ff(int ebnum)
 	for (j = 0; j < mtd->erasesize / subpgsize; ++j) {
 		clear_data(readbuf, subpgsize);
 		read = 0;
-		err = mtd->read(mtd, addr, subpgsize, &read, readbuf);
+		err = mtd_read(mtd, addr, subpgsize, &read, readbuf);
 		if (unlikely(err || read != subpgsize)) {
 			if (mtd_is_bitflip(err) && read == subpgsize) {
 				printk(PRINT_PREF "ECC correction at %#llx\n",
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index 102c79b7ac66..ecc68bf3f3f2 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -137,7 +137,7 @@ static inline int check_eraseblock(int ebnum, unsigned char *buf)
 	}
 
 retry:
-	err = mtd->read(mtd, addr, len, &read, check_buf);
+	err = mtd_read(mtd, addr, len, &read, check_buf);
 	if (mtd_is_bitflip(err))
 		printk(PRINT_PREF "single bit flip occurred at EB %d "
 		       "MTD reported that it was fixed.\n", ebnum);
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index ab80c0debac8..e2cdebf40840 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -216,7 +216,7 @@ void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len)
 	buf = vmalloc(len);
 	if (!buf)
 		return;
-	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	err = mtd_read(ubi->mtd, addr, len, &read, buf);
 	if (err && err != -EUCLEAN) {
 		ubi_err("error %d while reading %d bytes from PEB %d:%d, "
 			"read %zd bytes", err, len, pnum, offset, read);
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index b6c8959e6c7e..433382951d3d 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -170,7 +170,7 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
 
 	addr = (loff_t)pnum * ubi->peb_size + offset;
 retry:
-	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	err = mtd_read(ubi->mtd, addr, len, &read, buf);
 	if (err) {
 		const char *errstr = mtd_is_eccerr(err) ? " (ECC error)" : "";
 
@@ -1357,7 +1357,7 @@ int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum,
 		return 0;
 	}
 
-	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf1);
+	err = mtd_read(ubi->mtd, addr, len, &read, buf1);
 	if (err && !mtd_is_bitflip(err))
 		goto out_free;
 
@@ -1421,7 +1421,7 @@ int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len)
 		return 0;
 	}
 
-	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	err = mtd_read(ubi->mtd, addr, len, &read, buf);
 	if (err && !mtd_is_bitflip(err)) {
 		ubi_err("error %d while reading %d bytes from PEB %d:%d, "
 			"read %zd bytes", err, len, pnum, offset, read);
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index d638fafab649..eccd08d0e009 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -283,9 +283,11 @@ u16 mtd_Read_Page_Main(u8 *read_data, u32 Block,
 
 
 	while (PageCount) {
-		ret = spectra_mtd->read(spectra_mtd,
-					(Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					DeviceInfo.wPageDataSize, &retlen, read_data);
+		ret = mtd_read(spectra_mtd,
+			       (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+			       DeviceInfo.wPageDataSize,
+			       &retlen,
+			       read_data);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index ffdf4fca9c54..c59d642cade2 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -381,7 +381,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 
 		*bad_offset = ofs;
 
-		ret = c->mtd->read(c->mtd, ofs, readlen, &retlen, ebuf);
+		ret = mtd_read(c->mtd, ofs, readlen, &retlen, ebuf);
 		if (ret) {
 			printk(KERN_WARNING "Read of newly-erased block at 0x%08x failed: %d. Putting on bad_list\n", ofs, ret);
 			ret = -EIO;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index b09e51d2f81f..a24d3d21b63d 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -228,7 +228,7 @@ static int jffs2_verify_write(struct jffs2_sb_info *c, unsigned char *buf,
 	size_t retlen;
 	char *eccstr;
 
-	ret = c->mtd->read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify);
+	ret = mtd_read(c->mtd, ofs, c->wbuf_pagesize, &retlen, c->wbuf_verify);
 	if (ret && ret != -EUCLEAN && ret != -EBADMSG) {
 		printk(KERN_WARNING "jffs2_verify_write(): Read back of page at %08x failed: %d\n", c->wbuf_ofs, ret);
 		return ret;
@@ -337,7 +337,8 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
 		}
 
 		/* Do the read... */
-		ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf);
+		ret = mtd_read(c->mtd, start, c->wbuf_ofs - start, &retlen,
+			       buf);
 
 		/* ECC recovered ? */
 		if ((ret == -EUCLEAN || ret == -EBADMSG) &&
@@ -948,11 +949,11 @@ int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *re
 	int	ret;
 
 	if (!jffs2_is_writebuffered(c))
-		return c->mtd->read(c->mtd, ofs, len, retlen, buf);
+		return mtd_read(c->mtd, ofs, len, retlen, buf);
 
 	/* Read flash */
 	down_read(&c->wbuf_sem);
-	ret = c->mtd->read(c->mtd, ofs, len, retlen, buf);
+	ret = mtd_read(c->mtd, ofs, len, retlen, buf);
 
 	if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) {
 		if (ret == -EBADMSG)
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 046362894352..3ee64351685f 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -20,7 +20,7 @@ static int logfs_mtd_read(struct super_block *sb, loff_t ofs, size_t len,
 	size_t retlen;
 	int ret;
 
-	ret = mtd->read(mtd, ofs, len, &retlen, buf);
+	ret = mtd_read(mtd, ofs, len, &retlen, buf);
 	BUG_ON(ret == -EINVAL);
 	if (ret)
 		return ret;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index f38e8276b408..56478eb4bbc0 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -182,6 +182,8 @@ struct mtd_info {
 					    unsigned long len,
 					    unsigned long offset,
 					    unsigned long flags);
+	int (*read) (struct mtd_info *mtd, loff_t from, size_t len,
+		     size_t *retlen, u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -189,7 +191,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 
-	int (*read) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf);
 
 	/* In blackbox flight recorder like scenarios we want to make successful
@@ -301,6 +302,12 @@ static inline unsigned long mtd_get_unmapped_area(struct mtd_info *mtd,
 	return mtd->get_unmapped_area(mtd, len, offset, flags);
 }
 
+static inline int mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
+			   size_t *retlen, u_char *buf)
+{
+	return mtd->read(mtd, from, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From eda95cbf75193808f62948fb0142ba0901d8bee2 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 17:35:41 +0200
Subject: mtd: introduce mtd_write interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0020.c |  8 +++++---
 drivers/mtd/ftl.c                   | 35 ++++++++++++++++++-----------------
 drivers/mtd/mtdblock.c              |  4 ++--
 drivers/mtd/mtdblock_ro.c           |  2 +-
 drivers/mtd/mtdchar.c               |  2 +-
 drivers/mtd/mtdconcat.c             |  2 +-
 drivers/mtd/mtdcore.c               |  3 ++-
 drivers/mtd/mtdoops.c               |  4 ++--
 drivers/mtd/mtdpart.c               |  3 +--
 drivers/mtd/mtdswap.c               |  2 +-
 drivers/mtd/rfd_ftl.c               | 17 ++++++++---------
 drivers/mtd/tests/mtd_pagetest.c    |  8 ++++----
 drivers/mtd/tests/mtd_speedtest.c   |  8 ++++----
 drivers/mtd/tests/mtd_stresstest.c  |  2 +-
 drivers/mtd/tests/mtd_subpagetest.c |  6 +++---
 drivers/mtd/tests/mtd_torturetest.c |  2 +-
 drivers/mtd/ubi/io.c                |  7 +++----
 drivers/staging/spectra/lld_mtd.c   |  8 +++++---
 fs/jffs2/wbuf.c                     | 18 +++++++++---------
 fs/jffs2/writev.c                   |  5 +++--
 fs/logfs/dev_mtd.c                  |  2 +-
 include/linux/mtd/mtd.h             |  9 ++++++++-
 22 files changed, 84 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 666c52f8bf8d..85e80180b65b 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -699,7 +699,8 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
 				continue;
 			}
 			memcpy(buffer+buflen, elem_base, ECCBUF_SIZE-buflen);
-			ret = mtd->write(mtd, to, ECCBUF_SIZE, &thislen, buffer);
+			ret = mtd_write(mtd, to, ECCBUF_SIZE, &thislen,
+					buffer);
 			totlen += thislen;
 			if (ret || thislen != ECCBUF_SIZE)
 				goto write_error;
@@ -708,7 +709,8 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
 			to += ECCBUF_SIZE;
 		}
 		if (ECCBUF_DIV(elem_len)) { /* write clean aligned data */
-			ret = mtd->write(mtd, to, ECCBUF_DIV(elem_len), &thislen, elem_base);
+			ret = mtd_write(mtd, to, ECCBUF_DIV(elem_len),
+					&thislen, elem_base);
 			totlen += thislen;
 			if (ret || thislen != ECCBUF_DIV(elem_len))
 				goto write_error;
@@ -722,7 +724,7 @@ cfi_staa_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	}
 	if (buflen) { /* flush last page, even if not full */
 		/* This is sometimes intended behaviour, really */
-		ret = mtd->write(mtd, to, buflen, &thislen, buffer);
+		ret = mtd_write(mtd, to, buflen, &thislen, buffer);
 		totlen += thislen;
 		if (ret || thislen != ECCBUF_SIZE)
 			goto write_error;
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index 12fd7ebd3fd8..d591b1d0a6c1 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -422,8 +422,8 @@ static int prepare_xfer(partition_t *part, int i)
     header.LogicalEUN = cpu_to_le16(0xffff);
     header.EraseCount = cpu_to_le32(xfer->EraseCount);
 
-    ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset, sizeof(header),
-			   &retlen, (u_char *)&header);
+    ret = mtd_write(part->mbd.mtd, xfer->Offset, sizeof(header), &retlen,
+                    (u_char *)&header);
 
     if (ret) {
 	return ret;
@@ -438,8 +438,8 @@ static int prepare_xfer(partition_t *part, int i)
 
     for (i = 0; i < nbam; i++, offset += sizeof(uint32_t)) {
 
-	ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint32_t),
-			       &retlen, (u_char *)&ctl);
+	ret = mtd_write(part->mbd.mtd, offset, sizeof(uint32_t), &retlen,
+                        (u_char *)&ctl);
 
 	if (ret)
 	    return ret;
@@ -503,8 +503,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
     offset = xfer->Offset + 20; /* Bad! */
     unit = cpu_to_le16(0x7fff);
 
-    ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint16_t),
-			   &retlen, (u_char *) &unit);
+    ret = mtd_write(part->mbd.mtd, offset, sizeof(uint16_t), &retlen,
+                    (u_char *)&unit);
 
     if (ret) {
 	printk( KERN_WARNING "ftl: Failed to write back to BAM cache in copy_erase_unit()!\n");
@@ -531,8 +531,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
             }
 
 
-	    ret = part->mbd.mtd->write(part->mbd.mtd, dest, SECTOR_SIZE,
-                        &retlen, (u_char *) buf);
+	    ret = mtd_write(part->mbd.mtd, dest, SECTOR_SIZE, &retlen,
+                            (u_char *)buf);
 	    if (ret)  {
 		printk(KERN_WARNING "ftl: Error writing new xfer unit in copy_erase_unit\n");
 		return ret;
@@ -550,9 +550,11 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
     }
 
     /* Write the BAM to the transfer unit */
-    ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset + le32_to_cpu(part->header.BAMOffset),
-                    part->BlocksPerUnit * sizeof(int32_t), &retlen,
-		    (u_char *)part->bam_cache);
+    ret = mtd_write(part->mbd.mtd,
+                    xfer->Offset + le32_to_cpu(part->header.BAMOffset),
+                    part->BlocksPerUnit * sizeof(int32_t),
+                    &retlen,
+                    (u_char *)part->bam_cache);
     if (ret) {
 	printk( KERN_WARNING "ftl: Error writing BAM in copy_erase_unit\n");
 	return ret;
@@ -560,8 +562,8 @@ static int copy_erase_unit(partition_t *part, uint16_t srcunit,
 
 
     /* All clear? Then update the LogicalEUN again */
-    ret = part->mbd.mtd->write(part->mbd.mtd, xfer->Offset + 20, sizeof(uint16_t),
-			   &retlen, (u_char *)&srcunitswap);
+    ret = mtd_write(part->mbd.mtd, xfer->Offset + 20, sizeof(uint16_t),
+                    &retlen, (u_char *)&srcunitswap);
 
     if (ret) {
 	printk(KERN_WARNING "ftl: Error writing new LogicalEUN in copy_erase_unit\n");
@@ -887,8 +889,8 @@ static int set_bam_entry(partition_t *part, uint32_t log_addr,
 #endif
 	part->bam_cache[blk] = le_virt_addr;
     }
-    ret = part->mbd.mtd->write(part->mbd.mtd, offset, sizeof(uint32_t),
-                            &retlen, (u_char *)&le_virt_addr);
+    ret = mtd_write(part->mbd.mtd, offset, sizeof(uint32_t), &retlen,
+                    (u_char *)&le_virt_addr);
 
     if (ret) {
 	printk(KERN_NOTICE "ftl_cs: set_bam_entry() failed!\n");
@@ -947,8 +949,7 @@ static int ftl_write(partition_t *part, caddr_t buffer,
 	part->EUNInfo[part->bam_index].Deleted++;
 	offset = (part->EUNInfo[part->bam_index].Offset +
 		      blk * SECTOR_SIZE);
-	ret = part->mbd.mtd->write(part->mbd.mtd, offset, SECTOR_SIZE, &retlen,
-                                     buffer);
+	ret = mtd_write(part->mbd.mtd, offset, SECTOR_SIZE, &retlen, buffer);
 
 	if (ret) {
 	    printk(KERN_NOTICE "ftl_cs: block write failed!\n");
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index b0644d2d2a6e..ac7f1f1faa2d 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -102,7 +102,7 @@ static int erase_write (struct mtd_info *mtd, unsigned long pos,
 	 * Next, write the data to flash.
 	 */
 
-	ret = mtd->write(mtd, pos, len, &retlen, buf);
+	ret = mtd_write(mtd, pos, len, &retlen, buf);
 	if (ret)
 		return ret;
 	if (retlen != len)
@@ -152,7 +152,7 @@ static int do_cached_write (struct mtdblk_dev *mtdblk, unsigned long pos,
 		mtd->name, pos, len);
 
 	if (!sect_size)
-		return mtd->write(mtd, pos, len, &retlen, buf);
+		return mtd_write(mtd, pos, len, &retlen, buf);
 
 	while (len > 0) {
 		unsigned long sect_start = (pos/sect_size)*sect_size;
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index f5737b1153fa..92759a9d2985 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -40,7 +40,7 @@ static int mtdblock_writesect(struct mtd_blktrans_dev *dev,
 {
 	size_t retlen;
 
-	if (dev->mtd->write(dev->mtd, (block * 512), 512, &retlen, buf))
+	if (mtd_write(dev->mtd, (block * 512), 512, &retlen, buf))
 		return 1;
 	return 0;
 }
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index c7f484687fa3..922da31d2c6b 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -331,7 +331,7 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
 		}
 
 		default:
-			ret = (*(mtd->write))(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_write(mtd, *ppos, len, &retlen, kbuf);
 		}
 		if (!ret) {
 			*ppos += retlen;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 45460349fd12..45215501c4c7 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -148,7 +148,7 @@ concat_write(struct mtd_info *mtd, loff_t to, size_t len,
 		if (!(subdev->flags & MTD_WRITEABLE))
 			err = -EROFS;
 		else
-			err = subdev->write(subdev, to, size, &retsize, buf);
+			err = mtd_write(subdev, to, size, &retsize, buf);
 
 		if (err)
 			break;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index b01993ea260e..e36191ab47c3 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -699,7 +699,8 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		for (i=0; i<count; i++) {
 			if (!vecs[i].iov_len)
 				continue;
-			ret = mtd->write(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base);
+			ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen,
+					vecs[i].iov_base);
 			totlen += thislen;
 			if (ret || thislen != vecs[i].iov_len)
 				break;
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 23629ad08507..9c9d58617c98 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -225,8 +225,8 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
 		ret = mtd->panic_write(mtd, cxt->nextpage * record_size,
 					record_size, &retlen, cxt->oops_buf);
 	else
-		ret = mtd->write(mtd, cxt->nextpage * record_size,
-					record_size, &retlen, cxt->oops_buf);
+		ret = mtd_write(mtd, cxt->nextpage * record_size,
+				record_size, &retlen, cxt->oops_buf);
 
 	if (retlen != record_size || ret < 0)
 		printk(KERN_ERR "mtdoops: write failure at %ld (%td of %ld written), error %d\n",
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 59cd7974bc50..96574a036567 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -188,8 +188,7 @@ static int part_write(struct mtd_info *mtd, loff_t to, size_t len,
 		len = 0;
 	else if (to + len > mtd->size)
 		len = mtd->size - to;
-	return part->master->write(part->master, to + part->offset,
-				    len, retlen, buf);
+	return mtd_write(part->master, to + part->offset, len, retlen, buf);
 }
 
 static int part_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index b3282d2aa8f8..6ff823e29c0c 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -689,7 +689,7 @@ retry:
 		return ret;
 
 	writepos = (loff_t)*bp << PAGE_SHIFT;
-	ret =  mtd->write(mtd, writepos, PAGE_SIZE, &retlen, buf);
+	ret =  mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf);
 	if (ret == -EIO || mtd_is_eccerr(ret)) {
 		d->curr_write_pos--;
 		eb->active_count--;
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index d9fe2d0533d9..c594bb7abfa3 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -304,9 +304,8 @@ static void erase_callback(struct erase_info *erase)
 	part->blocks[i].used_sectors = 0;
 	part->blocks[i].erases++;
 
-	rc = part->mbd.mtd->write(part->mbd.mtd,
-		part->blocks[i].offset, sizeof(magic), &retlen,
-		(u_char*)&magic);
+	rc = mtd_write(part->mbd.mtd, part->blocks[i].offset, sizeof(magic),
+		       &retlen, (u_char *)&magic);
 
 	if (!rc && retlen != sizeof(magic))
 		rc = -EIO;
@@ -595,8 +594,8 @@ static int mark_sector_deleted(struct partition *part, u_long old_addr)
 
 	addr = part->blocks[block].offset +
 			(HEADER_MAP_OFFSET + offset) * sizeof(u16);
-	rc = part->mbd.mtd->write(part->mbd.mtd, addr,
-		sizeof(del), &retlen, (u_char*)&del);
+	rc = mtd_write(part->mbd.mtd, addr, sizeof(del), &retlen,
+		       (u_char *)&del);
 
 	if (!rc && retlen != sizeof(del))
 		rc = -EIO;
@@ -668,8 +667,8 @@ static int do_writesect(struct mtd_blktrans_dev *dev, u_long sector, char *buf,
 
 	addr = (i + part->header_sectors_per_block) * SECTOR_SIZE +
 		block->offset;
-	rc = part->mbd.mtd->write(part->mbd.mtd,
-		addr, SECTOR_SIZE, &retlen, (u_char*)buf);
+	rc = mtd_write(part->mbd.mtd, addr, SECTOR_SIZE, &retlen,
+		       (u_char *)buf);
 
 	if (!rc && retlen != SECTOR_SIZE)
 		rc = -EIO;
@@ -688,8 +687,8 @@ static int do_writesect(struct mtd_blktrans_dev *dev, u_long sector, char *buf,
 	part->header_cache[i + HEADER_MAP_OFFSET] = entry;
 
 	addr = block->offset + (HEADER_MAP_OFFSET + i) * sizeof(u16);
-	rc = part->mbd.mtd->write(part->mbd.mtd, addr,
-			sizeof(entry), &retlen, (u_char*)&entry);
+	rc = mtd_write(part->mbd.mtd, addr, sizeof(entry), &retlen,
+		       (u_char *)&entry);
 
 	if (!rc && retlen != sizeof(entry))
 		rc = -EIO;
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index 6d62e24a03ed..83da97e54f97 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -100,7 +100,7 @@ static int write_eraseblock(int ebnum)
 
 	set_random_data(writebuf, mtd->erasesize);
 	cond_resched();
-	err = mtd->write(mtd, addr, mtd->erasesize, &written, writebuf);
+	err = mtd_write(mtd, addr, mtd->erasesize, &written, writebuf);
 	if (err || written != mtd->erasesize)
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr);
@@ -335,7 +335,7 @@ static int erasecrosstest(void)
 	printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
 	set_random_data(writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
-	err = mtd->write(mtd, addr0, pgsize, &written, writebuf);
+	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr0);
@@ -368,7 +368,7 @@ static int erasecrosstest(void)
 	printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
 	set_random_data(writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
-	err = mtd->write(mtd, addr0, pgsize, &written, writebuf);
+	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr0);
@@ -425,7 +425,7 @@ static int erasetest(void)
 
 	printk(PRINT_PREF "writing 1st page of block %d\n", ebnum);
 	set_random_data(writebuf, pgsize);
-	err = mtd->write(mtd, addr0, pgsize, &written, writebuf);
+	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
 	if (err || written != pgsize) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr0);
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 3c9529bd0a62..c7b18e189082 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -143,7 +143,7 @@ static int write_eraseblock(int ebnum)
 	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	err = mtd->write(mtd, addr, mtd->erasesize, &written, iobuf);
+	err = mtd_write(mtd, addr, mtd->erasesize, &written, iobuf);
 	if (err || written != mtd->erasesize) {
 		printk(PRINT_PREF "error: write failed at %#llx\n", addr);
 		if (!err)
@@ -161,7 +161,7 @@ static int write_eraseblock_by_page(int ebnum)
 	void *buf = iobuf;
 
 	for (i = 0; i < pgcnt; i++) {
-		err = mtd->write(mtd, addr, pgsize, &written, buf);
+		err = mtd_write(mtd, addr, pgsize, &written, buf);
 		if (err || written != pgsize) {
 			printk(PRINT_PREF "error: write failed at %#llx\n",
 			       addr);
@@ -184,7 +184,7 @@ static int write_eraseblock_by_2pages(int ebnum)
 	void *buf = iobuf;
 
 	for (i = 0; i < n; i++) {
-		err = mtd->write(mtd, addr, sz, &written, buf);
+		err = mtd_write(mtd, addr, sz, &written, buf);
 		if (err || written != sz) {
 			printk(PRINT_PREF "error: write failed at %#llx\n",
 			       addr);
@@ -196,7 +196,7 @@ static int write_eraseblock_by_2pages(int ebnum)
 		buf += sz;
 	}
 	if (pgcnt % 2) {
-		err = mtd->write(mtd, addr, pgsize, &written, buf);
+		err = mtd_write(mtd, addr, pgsize, &written, buf);
 		if (err || written != pgsize) {
 			printk(PRINT_PREF "error: write failed at %#llx\n",
 			       addr);
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index 83a843723880..f8aac4b7e59a 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -192,7 +192,7 @@ static int do_write(void)
 		}
 	}
 	addr = eb * mtd->erasesize + offs;
-	err = mtd->write(mtd, addr, len, &written, writebuf);
+	err = mtd_write(mtd, addr, len, &written, writebuf);
 	if (unlikely(err || written != len)) {
 		printk(PRINT_PREF "error: write failed at 0x%llx\n",
 		       (long long)addr);
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index d81f89a19daa..b90c01036b49 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -120,7 +120,7 @@ static int write_eraseblock(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 
 	set_random_data(writebuf, subpgsize);
-	err = mtd->write(mtd, addr, subpgsize, &written, writebuf);
+	err = mtd_write(mtd, addr, subpgsize, &written, writebuf);
 	if (unlikely(err || written != subpgsize)) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr);
@@ -134,7 +134,7 @@ static int write_eraseblock(int ebnum)
 	addr += subpgsize;
 
 	set_random_data(writebuf, subpgsize);
-	err = mtd->write(mtd, addr, subpgsize, &written, writebuf);
+	err = mtd_write(mtd, addr, subpgsize, &written, writebuf);
 	if (unlikely(err || written != subpgsize)) {
 		printk(PRINT_PREF "error: write failed at %#llx\n",
 		       (long long)addr);
@@ -158,7 +158,7 @@ static int write_eraseblock2(int ebnum)
 		if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
 			break;
 		set_random_data(writebuf, subpgsize * k);
-		err = mtd->write(mtd, addr, subpgsize * k, &written, writebuf);
+		err = mtd_write(mtd, addr, subpgsize * k, &written, writebuf);
 		if (unlikely(err || written != subpgsize * k)) {
 			printk(PRINT_PREF "error: write failed at %#llx\n",
 			       (long long)addr);
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index ecc68bf3f3f2..dd34a519fa7a 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -189,7 +189,7 @@ static inline int write_pattern(int ebnum, void *buf)
 		addr = (ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
 		len = pgcnt * pgsize;
 	}
-	err = mtd->write(mtd, addr, len, &written, buf);
+	err = mtd_write(mtd, addr, len, &written, buf);
 	if (err) {
 		printk(PRINT_PREF "error %d while writing EB %d, written %zd"
 		      " bytes\n", err, ebnum, written);
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 433382951d3d..8d832fc9e9e4 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -289,7 +289,7 @@ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset,
 	}
 
 	addr = (loff_t)pnum * ubi->peb_size + offset;
-	err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf);
+	err = mtd_write(ubi->mtd, addr, len, &written, buf);
 	if (err) {
 		ubi_err("error %d while writing %d bytes to PEB %d:%d, written "
 			"%zd bytes", err, len, pnum, offset, written);
@@ -525,11 +525,10 @@ static int nor_erase_prepare(struct ubi_device *ubi, int pnum)
 	 * the header comment in scan.c for more information).
 	 */
 	addr = (loff_t)pnum * ubi->peb_size;
-	err = ubi->mtd->write(ubi->mtd, addr, 4, &written, (void *)&data);
+	err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data);
 	if (!err) {
 		addr += ubi->vid_hdr_aloffset;
-		err = ubi->mtd->write(ubi->mtd, addr, 4, &written,
-				      (void *)&data);
+		err = mtd_write(ubi->mtd, addr, 4, &written, (void *)&data);
 		if (!err)
 			return 0;
 	}
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index eccd08d0e009..2eb032131960 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -233,9 +233,11 @@ u16 mtd_Write_Page_Main(u8 *write_data, u32 Block,
 
 
 	while (PageCount) {
-		ret = spectra_mtd->write(spectra_mtd,
-					 (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					 DeviceInfo.wPageDataSize, &retlen, write_data);
+		ret = mtd_write(spectra_mtd,
+				(Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+				DeviceInfo.wPageDataSize,
+				&retlen,
+				write_data);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index a24d3d21b63d..3ea2f8db9358 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -414,13 +414,12 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
 		if (breakme++ == 20) {
 			printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs);
 			breakme = 0;
-			c->mtd->write(c->mtd, ofs, towrite, &retlen,
-				      brokenbuf);
+			mtd_write(c->mtd, ofs, towrite, &retlen, brokenbuf);
 			ret = -EIO;
 		} else
 #endif
-			ret = c->mtd->write(c->mtd, ofs, towrite, &retlen,
-					    rewrite_buf);
+			ret = mtd_write(c->mtd, ofs, towrite, &retlen,
+					rewrite_buf);
 
 		if (ret || retlen != towrite || jffs2_verify_write(c, rewrite_buf, ofs)) {
 			/* Argh. We tried. Really we did. */
@@ -620,13 +619,14 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
 	if (breakme++ == 20) {
 		printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs);
 		breakme = 0;
-		c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen,
-			      brokenbuf);
+		mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen,
+			  brokenbuf);
 		ret = -EIO;
 	} else
 #endif
 
-		ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf);
+		ret = mtd_write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize,
+				&retlen, c->wbuf);
 
 	if (ret) {
 		printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n", ret);
@@ -862,8 +862,8 @@ int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs,
 		v += wbuf_retlen;
 
 		if (vlen >= c->wbuf_pagesize) {
-			ret = c->mtd->write(c->mtd, outvec_to, PAGE_DIV(vlen),
-					    &wbuf_retlen, v);
+			ret = mtd_write(c->mtd, outvec_to, PAGE_DIV(vlen),
+					&wbuf_retlen, v);
 			if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen))
 				goto outfile;
 
diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c
index b9276b11bac6..b05710fd552a 100644
--- a/fs/jffs2/writev.c
+++ b/fs/jffs2/writev.c
@@ -26,7 +26,8 @@ static inline int mtd_fake_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	for (i=0; i<count; i++) {
 		if (!vecs[i].iov_len)
 			continue;
-		ret = mtd->write(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base);
+		ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen,
+				vecs[i].iov_base);
 		totlen += thislen;
 		if (ret || thislen != vecs[i].iov_len)
 			break;
@@ -61,7 +62,7 @@ int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len,
 			size_t *retlen, const u_char *buf)
 {
 	int ret;
-	ret = c->mtd->write(c->mtd, ofs, len, retlen, buf);
+	ret = mtd_write(c->mtd, ofs, len, retlen, buf);
 
 	if (jffs2_sum_active()) {
 		struct kvec vecs[1];
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 3ee64351685f..1842440d6564 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -49,7 +49,7 @@ static int loffs_mtd_write(struct super_block *sb, loff_t ofs, size_t len,
 	BUG_ON(len > PAGE_CACHE_SIZE);
 	page_start = ofs & PAGE_CACHE_MASK;
 	page_end = PAGE_CACHE_ALIGN(ofs + len) - 1;
-	ret = mtd->write(mtd, ofs, len, &retlen, buf);
+	ret = mtd_write(mtd, ofs, len, &retlen, buf);
 	if (ret || (retlen != len))
 		return -EIO;
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 56478eb4bbc0..1da7f4a6ef88 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -184,6 +184,8 @@ struct mtd_info {
 					    unsigned long flags);
 	int (*read) (struct mtd_info *mtd, loff_t from, size_t len,
 		     size_t *retlen, u_char *buf);
+	int (*write) (struct mtd_info *mtd, loff_t to, size_t len,
+		      size_t *retlen, const u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -191,7 +193,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 
-	int (*write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf);
 
 	/* In blackbox flight recorder like scenarios we want to make successful
 	   writes in interrupt context. panic_write() is only intended to be
@@ -308,6 +309,12 @@ static inline int mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
 	return mtd->read(mtd, from, len, retlen, buf);
 }
 
+static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
+			    size_t *retlen, const u_char *buf)
+{
+	return mtd->write(mtd, to, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 7ae79d7ff1769a3e9c47076b46e4eaa11204a2ee Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:03:17 +0200
Subject: mtd: introduce mtd_panic_write interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdoops.c   |  4 ++--
 drivers/mtd/mtdpart.c   |  4 ++--
 include/linux/mtd/mtd.h | 25 +++++++++++++++----------
 3 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 9c9d58617c98..7be2018ffbcc 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -222,8 +222,8 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
 	hdr[1] = MTDOOPS_KERNMSG_MAGIC;
 
 	if (panic)
-		ret = mtd->panic_write(mtd, cxt->nextpage * record_size,
-					record_size, &retlen, cxt->oops_buf);
+		ret = mtd_panic_write(mtd, cxt->nextpage * record_size,
+				      record_size, &retlen, cxt->oops_buf);
 	else
 		ret = mtd_write(mtd, cxt->nextpage * record_size,
 				record_size, &retlen, cxt->oops_buf);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 96574a036567..9ed58f7d7466 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -201,8 +201,8 @@ static int part_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 		len = 0;
 	else if (to + len > mtd->size)
 		len = mtd->size - to;
-	return part->master->panic_write(part->master, to + part->offset,
-				    len, retlen, buf);
+	return mtd_panic_write(part->master, to + part->offset, len, retlen,
+			       buf);
 }
 
 static int part_write_oob(struct mtd_info *mtd, loff_t to,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 1da7f4a6ef88..2fb83cd3d264 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -186,6 +186,8 @@ struct mtd_info {
 		     size_t *retlen, u_char *buf);
 	int (*write) (struct mtd_info *mtd, loff_t to, size_t len,
 		      size_t *retlen, const u_char *buf);
+	int (*panic_write) (struct mtd_info *mtd, loff_t to, size_t len,
+			    size_t *retlen, const u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -193,16 +195,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 
-
-	/* In blackbox flight recorder like scenarios we want to make successful
-	   writes in interrupt context. panic_write() is only intended to be
-	   called when its known the kernel is about to panic and we need the
-	   write to succeed. Since the kernel is not going to be running for much
-	   longer, this function can break locks and delay to ensure the write
-	   succeeds (but not sleep). */
-
-	int (*panic_write) (struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf);
-
 	int (*read_oob) (struct mtd_info *mtd, loff_t from,
 			 struct mtd_oob_ops *ops);
 	int (*write_oob) (struct mtd_info *mtd, loff_t to,
@@ -315,6 +307,19 @@ static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 	return mtd->write(mtd, to, len, retlen, buf);
 }
 
+/*
+ * In blackbox flight recorder like scenarios we want to make successful writes
+ * in interrupt context. panic_write() is only intended to be called when its
+ * known the kernel is about to panic and we need the write to succeed. Since
+ * the kernel is not going to be running for much longer, this function can
+ * break locks and delay to ensure the write succeeds (but not sleep).
+ */
+static inline int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
+				  size_t *retlen, const u_char *buf)
+{
+	return mtd->panic_write(mtd, to, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From fd2819bbc92fc98bed5d612e4acbe16b6326f6bf Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:27:05 +0200
Subject: mtd: introduce mtd_read_oob interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/inftlcore.c           |  2 +-
 drivers/mtd/mtdchar.c             |  4 ++--
 drivers/mtd/mtdconcat.c           |  2 +-
 drivers/mtd/mtdpart.c             |  2 +-
 drivers/mtd/mtdswap.c             |  4 ++--
 drivers/mtd/nand/nand_bbt.c       |  6 +++---
 drivers/mtd/nftlcore.c            |  2 +-
 drivers/mtd/sm_ftl.c              |  2 +-
 drivers/mtd/ssfdc.c               |  2 +-
 drivers/mtd/tests/mtd_oobtest.c   | 14 +++++++-------
 drivers/mtd/tests/mtd_readtest.c  |  2 +-
 drivers/staging/spectra/lld_mtd.c | 12 ++++++------
 fs/jffs2/wbuf.c                   |  4 ++--
 include/linux/mtd/mtd.h           | 10 ++++++++--
 14 files changed, 37 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 0b038bed7b9c..07646e1273e2 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -158,7 +158,7 @@ int inftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->read_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
+	res = mtd_read_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 922da31d2c6b..e74f570a7b93 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -227,7 +227,7 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
 			ops.oobbuf = NULL;
 			ops.len = len;
 
-			ret = mtd->read_oob(mtd, *ppos, &ops);
+			ret = mtd_read_oob(mtd, *ppos, &ops);
 			retlen = ops.retlen;
 			break;
 		}
@@ -471,7 +471,7 @@ static int mtdchar_readoob(struct file *file, struct mtd_info *mtd,
 		return -ENOMEM;
 
 	start &= ~((uint64_t)mtd->writesize - 1);
-	ret = mtd->read_oob(mtd, start, &ops);
+	ret = mtd_read_oob(mtd, start, &ops);
 
 	if (put_user(ops.oobretlen, retp))
 		ret = -EFAULT;
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 45215501c4c7..cf35642e5f49 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -273,7 +273,7 @@ concat_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops)
 		if (from + devops.len > subdev->size)
 			devops.len = subdev->size - from;
 
-		err = subdev->read_oob(subdev, from, &devops);
+		err = mtd_read_oob(subdev, from, &devops);
 		ops->retlen += devops.retlen;
 		ops->oobretlen += devops.oobretlen;
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 9ed58f7d7466..6fdc74ef19c1 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -138,7 +138,7 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from,
 			return -EINVAL;
 	}
 
-	res = part->master->read_oob(part->master, from + part->offset, ops);
+	res = mtd_read_oob(part->master, from + part->offset, ops);
 	if (unlikely(res)) {
 		if (mtd_is_bitflip(res))
 			mtd->ecc_stats.corrected++;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 6ff823e29c0c..0f0ab18d4405 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -312,7 +312,7 @@ static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb)
 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from,
 			struct mtd_oob_ops *ops)
 {
-	int ret = d->mtd->read_oob(d->mtd, from, ops);
+	int ret = mtd_read_oob(d->mtd, from, ops);
 
 	if (mtd_is_bitflip(ret))
 		return ret;
@@ -955,7 +955,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 
 		pos = base;
 		for (i = 0; i < mtd_pages; i++) {
-			ret = mtd->read_oob(mtd, pos, &ops);
+			ret = mtd_read_oob(mtd, pos, &ops);
 			if (ret)
 				goto error;
 
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 1bcd6bc6798c..fcab50e80b90 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -317,7 +317,7 @@ static int scan_read_raw_oob(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
 		ops.len = min(len, (size_t)mtd->writesize);
 		ops.oobbuf = buf + ops.len;
 
-		res = mtd->read_oob(mtd, offs, &ops);
+		res = mtd_read_oob(mtd, offs, &ops);
 
 		if (res)
 			return res;
@@ -434,7 +434,7 @@ static int scan_block_fast(struct mtd_info *mtd, struct nand_bbt_descr *bd,
 		 * Read the full oob until read_oob is fixed to handle single
 		 * byte reads for 16 bit buswidth.
 		 */
-		ret = mtd->read_oob(mtd, offs, &ops);
+		ret = mtd_read_oob(mtd, offs, &ops);
 		/* Ignore ECC errors when checking for BBM */
 		if (ret && !mtd_is_bitflip_or_eccerr(ret))
 			return ret;
@@ -769,7 +769,7 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf,
 			/* Read oob data */
 			ops.ooblen = (len >> this->page_shift) * mtd->oobsize;
 			ops.oobbuf = &buf[len];
-			res = mtd->read_oob(mtd, to + mtd->writesize, &ops);
+			res = mtd_read_oob(mtd, to + mtd->writesize, &ops);
 			if (res < 0 || ops.oobretlen != ops.ooblen)
 				goto outerr;
 
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 1a9d9c1d3a74..7497f5efc26b 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -153,7 +153,7 @@ int nftl_read_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->read_oob(mtd, offs & ~mask, &ops);
+	res = mtd_read_oob(mtd, offs & ~mask, &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 2f1acb1ab5e8..748aa4416691 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -278,7 +278,7 @@ again:
 
 	/* Unfortunately, oob read will _always_ succeed,
 		despite card removal..... */
-	ret = mtd->read_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops);
+	ret = mtd_read_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops);
 
 	/* Test for unknown errors */
 	if (ret != 0 && !mtd_is_bitflip_or_eccerr(ret)) {
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index 293e22a5710f..0e6881338357 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -175,7 +175,7 @@ static int read_raw_oob(struct mtd_info *mtd, loff_t offs, uint8_t *buf)
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	ret = mtd->read_oob(mtd, offs, &ops);
+	ret = mtd_read_oob(mtd, offs, &ops);
 	if (ret < 0 || ops.oobretlen != OOB_SIZE)
 		return -1;
 
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index 7d52854c16dd..962d27a64e64 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -192,7 +192,7 @@ static int verify_eraseblock(int ebnum)
 		ops.ooboffs   = use_offset;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = readbuf;
-		err = mtd->read_oob(mtd, addr, &ops);
+		err = mtd_read_oob(mtd, addr, &ops);
 		if (err || ops.oobretlen != use_len) {
 			printk(PRINT_PREF "error: readoob failed at %#llx\n",
 			       (long long)addr);
@@ -219,7 +219,7 @@ static int verify_eraseblock(int ebnum)
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
 			ops.oobbuf    = readbuf;
-			err = mtd->read_oob(mtd, addr, &ops);
+			err = mtd_read_oob(mtd, addr, &ops);
 			if (err || ops.oobretlen != mtd->ecclayout->oobavail) {
 				printk(PRINT_PREF "error: readoob failed at "
 				       "%#llx\n", (long long)addr);
@@ -284,7 +284,7 @@ static int verify_eraseblock_in_one_go(int ebnum)
 	ops.ooboffs   = 0;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = readbuf;
-	err = mtd->read_oob(mtd, addr, &ops);
+	err = mtd_read_oob(mtd, addr, &ops);
 	if (err || ops.oobretlen != len) {
 		printk(PRINT_PREF "error: readoob failed at %#llx\n",
 		       (long long)addr);
@@ -544,7 +544,7 @@ static int __init mtd_oobtest_init(void)
 	ops.oobbuf    = readbuf;
 	printk(PRINT_PREF "attempting to start read past end of OOB\n");
 	printk(PRINT_PREF "an error is expected...\n");
-	err = mtd->read_oob(mtd, addr0, &ops);
+	err = mtd_read_oob(mtd, addr0, &ops);
 	if (err) {
 		printk(PRINT_PREF "error occurred as expected\n");
 		err = 0;
@@ -588,7 +588,7 @@ static int __init mtd_oobtest_init(void)
 		ops.oobbuf    = readbuf;
 		printk(PRINT_PREF "attempting to read past end of device\n");
 		printk(PRINT_PREF "an error is expected...\n");
-		err = mtd->read_oob(mtd, mtd->size - mtd->writesize, &ops);
+		err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
 			printk(PRINT_PREF "error occurred as expected\n");
 			err = 0;
@@ -632,7 +632,7 @@ static int __init mtd_oobtest_init(void)
 		ops.oobbuf    = readbuf;
 		printk(PRINT_PREF "attempting to read past end of device\n");
 		printk(PRINT_PREF "an error is expected...\n");
-		err = mtd->read_oob(mtd, mtd->size - mtd->writesize, &ops);
+		err = mtd_read_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
 			printk(PRINT_PREF "error occurred as expected\n");
 			err = 0;
@@ -698,7 +698,7 @@ static int __init mtd_oobtest_init(void)
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = readbuf;
-		err = mtd->read_oob(mtd, addr, &ops);
+		err = mtd_read_oob(mtd, addr, &ops);
 		if (err)
 			goto out;
 		if (memcmp(readbuf, writebuf, mtd->ecclayout->oobavail * 2)) {
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
index 0c58d2976c76..5eaeada84284 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/mtd_readtest.c
@@ -74,7 +74,7 @@ static int read_eraseblock_by_page(int ebnum)
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
 			ops.oobbuf    = oobbuf;
-			ret = mtd->read_oob(mtd, addr, &ops);
+			ret = mtd_read_oob(mtd, addr, &ops);
 			if ((ret && !mtd_is_bitflip(ret)) ||
 					ops.oobretlen != mtd->oobsize) {
 				printk(PRINT_PREF "error: read oob failed at "
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index 2eb032131960..ed8e5f067087 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -351,9 +351,9 @@ u16 mtd_Read_Page_Main_Spare(u8 *read_data, u32 Block,
 		ops.ooblen = BTSIG_BYTES;
 		ops.ooboffs = 0;
 
-		ret = spectra_mtd->read_oob(spectra_mtd,
-					    (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					    &ops);
+		ret = mtd_read_oob(spectra_mtd,
+				   (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+				   &ops);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
@@ -484,9 +484,9 @@ u16 mtd_Read_Page_Spare(u8 *read_data, u32 Block,
 		ops.ooblen = BTSIG_BYTES;
 		ops.ooboffs = 0;
 
-		ret = spectra_mtd->read_oob(spectra_mtd,
-					    (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					    &ops);
+		ret = mtd_read_oob(spectra_mtd,
+				   (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+				   &ops);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 3ea2f8db9358..efc0cb370306 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1032,7 +1032,7 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
 	ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
 	ops.datbuf = NULL;
 
-	ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
+	ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
 	if (ret || ops.oobretlen != ops.ooblen) {
 		printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
 				" bytes, read %zd bytes, error %d\n",
@@ -1075,7 +1075,7 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
 	ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
 	ops.datbuf = NULL;
 
-	ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
+	ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
 	if (ret || ops.oobretlen != ops.ooblen) {
 		printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
 				" bytes, read %zd bytes, error %d\n",
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 2fb83cd3d264..0db8d87ce451 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -188,6 +188,8 @@ struct mtd_info {
 		      size_t *retlen, const u_char *buf);
 	int (*panic_write) (struct mtd_info *mtd, loff_t to, size_t len,
 			    size_t *retlen, const u_char *buf);
+	int (*read_oob) (struct mtd_info *mtd, loff_t from,
+			 struct mtd_oob_ops *ops);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -195,8 +197,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 
-	int (*read_oob) (struct mtd_info *mtd, loff_t from,
-			 struct mtd_oob_ops *ops);
 	int (*write_oob) (struct mtd_info *mtd, loff_t to,
 			 struct mtd_oob_ops *ops);
 
@@ -320,6 +320,12 @@ static inline int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 	return mtd->panic_write(mtd, to, len, retlen, buf);
 }
 
+static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from,
+			       struct mtd_oob_ops *ops)
+{
+	return mtd->read_oob(mtd, from, ops);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From a2cc5ba075f9bc837d0b4d4ec7328dcefc11859d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:29:55 +0200
Subject: mtd: introduce mtd_write_oob interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/inftlcore.c           |  4 ++--
 drivers/mtd/mtdchar.c             |  6 +++---
 drivers/mtd/mtdconcat.c           |  2 +-
 drivers/mtd/mtdpart.c             |  2 +-
 drivers/mtd/mtdswap.c             |  4 ++--
 drivers/mtd/nand/nand_bbt.c       |  2 +-
 drivers/mtd/nand/sm_common.c      |  2 +-
 drivers/mtd/nftlcore.c            |  4 ++--
 drivers/mtd/sm_ftl.c              |  2 +-
 drivers/mtd/tests/mtd_oobtest.c   | 10 +++++-----
 drivers/staging/spectra/lld_mtd.c |  6 +++---
 fs/jffs2/wbuf.c                   |  2 +-
 include/linux/mtd/mtd.h           | 12 ++++++++----
 13 files changed, 31 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 07646e1273e2..28646c95cfb8 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -178,7 +178,7 @@ int inftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
+	res = mtd_write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
@@ -199,7 +199,7 @@ static int inftl_write(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.datbuf = buf;
 	ops.len = len;
 
-	res = mtd->write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
+	res = mtd_write_oob(mtd, offs & ~(mtd->writesize - 1), &ops);
 	*retlen = ops.retlen;
 	return res;
 }
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index e74f570a7b93..234e3d27143c 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -325,7 +325,7 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
 			ops.ooboffs = 0;
 			ops.len = len;
 
-			ret = mtd->write_oob(mtd, *ppos, &ops);
+			ret = mtd_write_oob(mtd, *ppos, &ops);
 			retlen = ops.retlen;
 			break;
 		}
@@ -426,7 +426,7 @@ static int mtdchar_writeoob(struct file *file, struct mtd_info *mtd,
 		return PTR_ERR(ops.oobbuf);
 
 	start &= ~((uint64_t)mtd->writesize - 1);
-	ret = mtd->write_oob(mtd, start, &ops);
+	ret = mtd_write_oob(mtd, start, &ops);
 
 	if (ops.oobretlen > 0xFFFFFFFFU)
 		ret = -EOVERFLOW;
@@ -609,7 +609,7 @@ static int mtdchar_write_ioctl(struct mtd_info *mtd,
 		ops.oobbuf = NULL;
 	}
 
-	ret = mtd->write_oob(mtd, (loff_t)req.start, &ops);
+	ret = mtd_write_oob(mtd, (loff_t)req.start, &ops);
 
 	kfree(ops.datbuf);
 	kfree(ops.oobbuf);
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index cf35642e5f49..3d9c1ffdbbbf 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -333,7 +333,7 @@ concat_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops)
 		if (to + devops.len > subdev->size)
 			devops.len = subdev->size - to;
 
-		err = subdev->write_oob(subdev, to, &devops);
+		err = mtd_write_oob(subdev, to, &devops);
 		ops->retlen += devops.oobretlen;
 		if (err)
 			return err;
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 6fdc74ef19c1..8a46cd2bb78f 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -217,7 +217,7 @@ static int part_write_oob(struct mtd_info *mtd, loff_t to,
 		return -EINVAL;
 	if (ops->datbuf && to + ops->len > mtd->size)
 		return -EINVAL;
-	return part->master->write_oob(part->master, to + part->offset, ops);
+	return mtd_write_oob(part->master, to + part->offset, ops);
 }
 
 static int part_write_user_prot_reg(struct mtd_info *mtd, loff_t from,
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 0f0ab18d4405..85797390e3dd 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -403,7 +403,7 @@ static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb,
 		offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize;
 	}
 
-	ret = d->mtd->write_oob(d->mtd, offset , &ops);
+	ret = mtd_write_oob(d->mtd, offset, &ops);
 
 	if (ret) {
 		dev_warn(d->dev, "Write OOB failed for block at %08llx "
@@ -946,7 +946,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 			patt = mtdswap_test_patt(test + i);
 			memset(d->page_buf, patt, mtd->writesize);
 			memset(d->oob_buf, patt, mtd->ecclayout->oobavail);
-			ret = mtd->write_oob(mtd, pos, &ops);
+			ret = mtd_write_oob(mtd, pos, &ops);
 			if (ret)
 				goto error;
 
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index fcab50e80b90..20a112f591fe 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -350,7 +350,7 @@ static int scan_write_bbt(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = oob;
 	ops.len = len;
 
-	return mtd->write_oob(mtd, offs, &ops);
+	return mtd_write_oob(mtd, offs, &ops);
 }
 
 static u32 bbt_get_ver_offs(struct mtd_info *mtd, struct nand_bbt_descr *td)
diff --git a/drivers/mtd/nand/sm_common.c b/drivers/mtd/nand/sm_common.c
index 32ae5af7444f..774c3c266713 100644
--- a/drivers/mtd/nand/sm_common.c
+++ b/drivers/mtd/nand/sm_common.c
@@ -55,7 +55,7 @@ static int sm_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	ops.datbuf = NULL;
 
 
-	ret = mtd->write_oob(mtd, ofs, &ops);
+	ret = mtd_write_oob(mtd, ofs, &ops);
 	if (ret < 0 || ops.oobretlen != SM_OOB_SIZE) {
 		printk(KERN_NOTICE
 			"sm_common: can't mark sector at %i as bad\n",
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 7497f5efc26b..8847e60ad167 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -174,7 +174,7 @@ int nftl_write_oob(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.oobbuf = buf;
 	ops.datbuf = NULL;
 
-	res = mtd->write_oob(mtd, offs & ~mask, &ops);
+	res = mtd_write_oob(mtd, offs & ~mask, &ops);
 	*retlen = ops.oobretlen;
 	return res;
 }
@@ -198,7 +198,7 @@ static int nftl_write(struct mtd_info *mtd, loff_t offs, size_t len,
 	ops.datbuf = buf;
 	ops.len = len;
 
-	res = mtd->write_oob(mtd, offs & ~mask, &ops);
+	res = mtd_write_oob(mtd, offs & ~mask, &ops);
 	*retlen = ops.retlen;
 	return res;
 }
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 748aa4416691..4ec2af7fb845 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -343,7 +343,7 @@ static int sm_write_sector(struct sm_ftl *ftl,
 	ops.ooblen = SM_OOB_SIZE;
 	ops.oobbuf = (void *)oob;
 
-	ret = mtd->write_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops);
+	ret = mtd_write_oob(mtd, sm_mkoffset(ftl, zone, block, boffset), &ops);
 
 	/* Now we assume that hardware will catch write bitflip errors */
 	/* If you are paranoid, use CONFIG_MTD_NAND_VERIFY_WRITE */
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index 962d27a64e64..81113885e086 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -139,7 +139,7 @@ static int write_eraseblock(int ebnum)
 		ops.ooboffs   = use_offset;
 		ops.datbuf    = NULL;
 		ops.oobbuf    = writebuf;
-		err = mtd->write_oob(mtd, addr, &ops);
+		err = mtd_write_oob(mtd, addr, &ops);
 		if (err || ops.oobretlen != use_len) {
 			printk(PRINT_PREF "error: writeoob failed at %#llx\n",
 			       (long long)addr);
@@ -524,7 +524,7 @@ static int __init mtd_oobtest_init(void)
 	ops.oobbuf    = writebuf;
 	printk(PRINT_PREF "attempting to start write past end of OOB\n");
 	printk(PRINT_PREF "an error is expected...\n");
-	err = mtd->write_oob(mtd, addr0, &ops);
+	err = mtd_write_oob(mtd, addr0, &ops);
 	if (err) {
 		printk(PRINT_PREF "error occurred as expected\n");
 		err = 0;
@@ -568,7 +568,7 @@ static int __init mtd_oobtest_init(void)
 		ops.oobbuf    = writebuf;
 		printk(PRINT_PREF "attempting to write past end of device\n");
 		printk(PRINT_PREF "an error is expected...\n");
-		err = mtd->write_oob(mtd, mtd->size - mtd->writesize, &ops);
+		err = mtd_write_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
 			printk(PRINT_PREF "error occurred as expected\n");
 			err = 0;
@@ -612,7 +612,7 @@ static int __init mtd_oobtest_init(void)
 		ops.oobbuf    = writebuf;
 		printk(PRINT_PREF "attempting to write past end of device\n");
 		printk(PRINT_PREF "an error is expected...\n");
-		err = mtd->write_oob(mtd, mtd->size - mtd->writesize, &ops);
+		err = mtd_write_oob(mtd, mtd->size - mtd->writesize, &ops);
 		if (err) {
 			printk(PRINT_PREF "error occurred as expected\n");
 			err = 0;
@@ -670,7 +670,7 @@ static int __init mtd_oobtest_init(void)
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
 			ops.oobbuf    = writebuf;
-			err = mtd->write_oob(mtd, addr, &ops);
+			err = mtd_write_oob(mtd, addr, &ops);
 			if (err)
 				goto out;
 			if (i % 256 == 0)
diff --git a/drivers/staging/spectra/lld_mtd.c b/drivers/staging/spectra/lld_mtd.c
index ed8e5f067087..4aa48ddf979c 100644
--- a/drivers/staging/spectra/lld_mtd.c
+++ b/drivers/staging/spectra/lld_mtd.c
@@ -411,9 +411,9 @@ u16 mtd_Write_Page_Main_Spare(u8 *write_data, u32 Block,
 		ops.ooblen = BTSIG_BYTES;
 		ops.ooboffs = 0;
 
-		ret = spectra_mtd->write_oob(spectra_mtd,
-					     (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
-					     &ops);
+		ret = mtd_write_oob(spectra_mtd,
+				    (Block * spectra_mtd->erasesize) + (Page * spectra_mtd->writesize),
+				    &ops);
 		if (ret) {
 			printk(KERN_ERR "%s failed %d\n", __func__, ret);
 			return FAIL;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index efc0cb370306..eae5be483682 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1101,7 +1101,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
 	ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
 	ops.datbuf = NULL;
 
-	ret = c->mtd->write_oob(c->mtd, jeb->offset, &ops);
+	ret = mtd_write_oob(c->mtd, jeb->offset, &ops);
 	if (ret || ops.oobretlen != ops.ooblen) {
 		printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd"
 				" bytes, read %zd bytes, error %d\n",
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 0db8d87ce451..abbc96ad3b2c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -190,16 +190,14 @@ struct mtd_info {
 			    size_t *retlen, const u_char *buf);
 	int (*read_oob) (struct mtd_info *mtd, loff_t from,
 			 struct mtd_oob_ops *ops);
+	int (*write_oob) (struct mtd_info *mtd, loff_t to,
+			  struct mtd_oob_ops *ops);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-
-	int (*write_oob) (struct mtd_info *mtd, loff_t to,
-			 struct mtd_oob_ops *ops);
-
 	/*
 	 * Methods to access the protection register area, present in some
 	 * flash devices. The user data is one time programmable but the
@@ -326,6 +324,12 @@ static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from,
 	return mtd->read_oob(mtd, from, ops);
 }
 
+static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
+				struct mtd_oob_ops *ops)
+{
+	return mtd->write_oob(mtd, to, ops);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From a750b5ce5e1174ea68f66bf79962c479f7f23998 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:33:28 +0200
Subject: mtd: introduce mtd_get_fact_prot_info interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  2 +-
 drivers/mtd/mtdpart.c   |  2 +-
 include/linux/mtd/mtd.h | 19 +++++++++++++------
 3 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 234e3d27143c..4b1772feeafc 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -925,7 +925,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		switch (mfi->mode) {
 		case MTD_FILE_MODE_OTP_FACTORY:
 			if (mtd->get_fact_prot_info)
-				ret = mtd->get_fact_prot_info(mtd, buf, 4096);
+				ret = mtd_get_fact_prot_info(mtd, buf, 4096);
 			break;
 		case MTD_FILE_MODE_OTP_USER:
 			if (mtd->get_user_prot_info)
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 8a46cd2bb78f..6bed8bb3b15d 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -175,7 +175,7 @@ static int part_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf,
 		size_t len)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->get_fact_prot_info(part->master, buf, len);
+	return mtd_get_fact_prot_info(part->master, buf, len);
 }
 
 static int part_write(struct mtd_info *mtd, loff_t to, size_t len,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index abbc96ad3b2c..9a7a7f2d2296 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -192,18 +192,14 @@ struct mtd_info {
 			 struct mtd_oob_ops *ops);
 	int (*write_oob) (struct mtd_info *mtd, loff_t to,
 			  struct mtd_oob_ops *ops);
+	int (*get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf,
+				   size_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/*
-	 * Methods to access the protection register area, present in some
-	 * flash devices. The user data is one time programmable but the
-	 * factory data is read only.
-	 */
-	int (*get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len);
 	int (*read_fact_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len);
 	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
@@ -330,6 +326,17 @@ static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
 	return mtd->write_oob(mtd, to, ops);
 }
 
+/*
+ * Method to access the protection register area, present in some flash
+ * devices. The user data is one time programmable but the factory data is read
+ * only.
+ */
+static inline int mtd_get_fact_prot_info(struct mtd_info *mtd,
+					 struct otp_info *buf, size_t len)
+{
+	return mtd->get_fact_prot_info(mtd, buf, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From d264f72ae56245358025109d9d066d159589802d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:40:06 +0200
Subject: mtd: introduce mtd_read_fact_prot_reg interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  3 ++-
 drivers/mtd/mtdpart.c   |  3 +--
 include/linux/mtd/mtd.h | 10 +++++++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 4b1772feeafc..6afb05469bbd 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -213,7 +213,8 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
 
 		switch (mfi->mode) {
 		case MTD_FILE_MODE_OTP_FACTORY:
-			ret = mtd->read_fact_prot_reg(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_read_fact_prot_reg(mtd, *ppos, len,
+						     &retlen, kbuf);
 			break;
 		case MTD_FILE_MODE_OTP_USER:
 			ret = mtd->read_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 6bed8bb3b15d..4f2c9137cd49 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -167,8 +167,7 @@ static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->read_fact_prot_reg(part->master, from,
-					len, retlen, buf);
+	return mtd_read_fact_prot_reg(part->master, from, len, retlen, buf);
 }
 
 static int part_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 9a7a7f2d2296..d77a7f83270f 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -194,13 +194,14 @@ struct mtd_info {
 			  struct mtd_oob_ops *ops);
 	int (*get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf,
 				   size_t len);
+	int (*read_fact_prot_reg) (struct mtd_info *mtd, loff_t from,
+				   size_t len, size_t *retlen, u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*read_fact_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len);
 	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
@@ -337,6 +338,13 @@ static inline int mtd_get_fact_prot_info(struct mtd_info *mtd,
 	return mtd->get_fact_prot_info(mtd, buf, len);
 }
 
+static inline int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
+					 size_t len, size_t *retlen,
+					 u_char *buf)
+{
+	return mtd->read_fact_prot_reg(mtd, from, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 855e5d8cfebc21f45c9446a88b61e29d94c03781 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:45:11 +0200
Subject: mtd: introduce mtd_get_user_prot_info interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  2 +-
 drivers/mtd/mtdpart.c   |  2 +-
 include/linux/mtd/mtd.h | 10 +++++++++-
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 6afb05469bbd..002a8b5428cc 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -930,7 +930,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			break;
 		case MTD_FILE_MODE_OTP_USER:
 			if (mtd->get_user_prot_info)
-				ret = mtd->get_user_prot_info(mtd, buf, 4096);
+				ret = mtd_get_user_prot_info(mtd, buf, 4096);
 			break;
 		default:
 			break;
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 4f2c9137cd49..bf1ab56afb8e 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -160,7 +160,7 @@ static int part_get_user_prot_info(struct mtd_info *mtd,
 		struct otp_info *buf, size_t len)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->get_user_prot_info(part->master, buf, len);
+	return mtd_get_user_prot_info(part->master, buf, len);
 }
 
 static int part_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index d77a7f83270f..ff0a3a18f397 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -196,13 +196,14 @@ struct mtd_info {
 				   size_t len);
 	int (*read_fact_prot_reg) (struct mtd_info *mtd, loff_t from,
 				   size_t len, size_t *retlen, u_char *buf);
+	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf,
+				   size_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, size_t len);
 	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len);
@@ -345,6 +346,13 @@ static inline int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 	return mtd->read_fact_prot_reg(mtd, from, len, retlen, buf);
 }
 
+static inline int mtd_get_user_prot_info(struct mtd_info *mtd,
+					 struct otp_info *buf,
+					 size_t len)
+{
+	return mtd->get_user_prot_info(mtd, buf, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 4ea1cabb926f03a8dbd6e3f064538d9a290ee9fd Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:47:59 +0200
Subject: mtd: introduce mtd_read_user_prot_reg interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  3 ++-
 drivers/mtd/mtdpart.c   |  3 +--
 include/linux/mtd/mtd.h | 10 +++++++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 002a8b5428cc..6aa3fb4a0292 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -217,7 +217,8 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
 						     &retlen, kbuf);
 			break;
 		case MTD_FILE_MODE_OTP_USER:
-			ret = mtd->read_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_read_user_prot_reg(mtd, *ppos, len,
+						     &retlen, kbuf);
 			break;
 		case MTD_FILE_MODE_RAW:
 		{
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index bf1ab56afb8e..f018373ef3b4 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -152,8 +152,7 @@ static int part_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->read_user_prot_reg(part->master, from,
-					len, retlen, buf);
+	return mtd_read_user_prot_reg(part->master, from, len, retlen, buf);
 }
 
 static int part_get_user_prot_info(struct mtd_info *mtd,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index ff0a3a18f397..855fb7fab697 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -198,13 +198,14 @@ struct mtd_info {
 				   size_t len, size_t *retlen, u_char *buf);
 	int (*get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf,
 				   size_t len);
+	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from,
+				   size_t len, size_t *retlen, u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len);
 
@@ -353,6 +354,13 @@ static inline int mtd_get_user_prot_info(struct mtd_info *mtd,
 	return mtd->get_user_prot_info(mtd, buf, len);
 }
 
+static inline int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
+					 size_t len, size_t *retlen,
+					 u_char *buf)
+{
+	return mtd->read_user_prot_reg(mtd, from, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 482b43adbb7b124316ec72c161b0d1655e759368 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:50:04 +0200
Subject: mtd: introduce mtd_write_user_prot_reg interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  3 ++-
 drivers/mtd/mtdpart.c   |  3 +--
 include/linux/mtd/mtd.h | 10 +++++++++-
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 6aa3fb4a0292..d8881707ca60 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -314,7 +314,8 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
 				ret = -EOPNOTSUPP;
 				break;
 			}
-			ret = mtd->write_user_prot_reg(mtd, *ppos, len, &retlen, kbuf);
+			ret = mtd_write_user_prot_reg(mtd, *ppos, len,
+						      &retlen, kbuf);
 			break;
 
 		case MTD_FILE_MODE_RAW:
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index f018373ef3b4..1e7b8d1693aa 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -222,8 +222,7 @@ static int part_write_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len, size_t *retlen, u_char *buf)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->write_user_prot_reg(part->master, from,
-					len, retlen, buf);
+	return mtd_write_user_prot_reg(part->master, from, len, retlen, buf);
 }
 
 static int part_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 855fb7fab697..554960793e37 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -200,13 +200,14 @@ struct mtd_info {
 				   size_t len);
 	int (*read_user_prot_reg) (struct mtd_info *mtd, loff_t from,
 				   size_t len, size_t *retlen, u_char *buf);
+	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t to, size_t len,
+				    size_t *retlen, u_char *buf);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf);
 	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len);
 
 	/* kvec-based read/write methods.
@@ -361,6 +362,13 @@ static inline int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	return mtd->read_user_prot_reg(mtd, from, len, retlen, buf);
 }
 
+static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
+					  size_t len, size_t *retlen,
+					  u_char *buf)
+{
+	return mtd->write_user_prot_reg(mtd, to, len, retlen, buf);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 4403dbfb4541d34e5db33db709094d57d09f7467 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:55:49 +0200
Subject: mtd: introduce mtd_lock_user_prot_reg interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   |  2 +-
 drivers/mtd/mtdpart.c   |  2 +-
 include/linux/mtd/mtd.h | 10 ++++++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index d8881707ca60..86308acb40e0 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -960,7 +960,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			return -EFAULT;
 		if (!mtd->lock_user_prot_reg)
 			return -EOPNOTSUPP;
-		ret = mtd->lock_user_prot_reg(mtd, oinfo.start, oinfo.length);
+		ret = mtd_lock_user_prot_reg(mtd, oinfo.start, oinfo.length);
 		break;
 	}
 #endif
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 1e7b8d1693aa..0bb16d6ed08a 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -229,7 +229,7 @@ static int part_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 		size_t len)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->lock_user_prot_reg(part->master, from, len);
+	return mtd_lock_user_prot_reg(part->master, from, len);
 }
 
 static int part_writev(struct mtd_info *mtd, const struct kvec *vecs,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 554960793e37..b58e5e8746ec 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -202,14 +202,14 @@ struct mtd_info {
 				   size_t len, size_t *retlen, u_char *buf);
 	int (*write_user_prot_reg) (struct mtd_info *mtd, loff_t to, size_t len,
 				    size_t *retlen, u_char *buf);
+	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from,
+				   size_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len);
-
 	/* kvec-based read/write methods.
 	   NB: The 'count' parameter is the number of _vectors_, each of
 	   which contains an (ofs, len) tuple.
@@ -369,6 +369,12 @@ static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
 	return mtd->write_user_prot_reg(mtd, to, len, retlen, buf);
 }
 
+static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
+					 size_t len)
+{
+	return mtd->lock_user_prot_reg(mtd, from, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From b0a31f7b2a668f00a8d0546dfeed65fac871b2da Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 18:59:12 +0200
Subject: mtd: introduce mtd_writev interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdconcat.c |  5 +++--
 drivers/mtd/mtdpart.c   |  4 ++--
 fs/jffs2/writev.c       |  2 +-
 include/linux/mtd/mtd.h | 18 ++++++++++++------
 4 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 3d9c1ffdbbbf..6fdae191e1ba 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -227,8 +227,9 @@ concat_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		if (!(subdev->flags & MTD_WRITEABLE))
 			err = -EROFS;
 		else
-			err = subdev->writev(subdev, &vecs_copy[entry_low],
-				entry_high - entry_low + 1, to, &retsize);
+			err = mtd_writev(subdev, &vecs_copy[entry_low],
+					 entry_high - entry_low + 1, to,
+					 &retsize);
 
 		vecs_copy[entry_high].iov_len = old_iov_len - size;
 		vecs_copy[entry_high].iov_base += size;
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 0bb16d6ed08a..c0bfa88c82f3 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -238,8 +238,8 @@ static int part_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	struct mtd_part *part = PART(mtd);
 	if (!(mtd->flags & MTD_WRITEABLE))
 		return -EROFS;
-	return part->master->writev(part->master, vecs, count,
-					to + part->offset, retlen);
+	return mtd_writev(part->master, vecs, count, to + part->offset,
+			  retlen);
 }
 
 static int part_erase(struct mtd_info *mtd, struct erase_info *instr)
diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c
index b05710fd552a..d0ef068709ad 100644
--- a/fs/jffs2/writev.c
+++ b/fs/jffs2/writev.c
@@ -52,7 +52,7 @@ int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs,
 	}
 
 	if (c->mtd->writev)
-		return c->mtd->writev(c->mtd, vecs, count, to, retlen);
+		return mtd_writev(c->mtd, vecs, count, to, retlen);
 	else {
 		return mtd_fake_writev(c->mtd, vecs, count, to, retlen);
 	}
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index b58e5e8746ec..4129cb5c3de4 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -204,18 +204,14 @@ struct mtd_info {
 				    size_t *retlen, u_char *buf);
 	int (*lock_user_prot_reg) (struct mtd_info *mtd, loff_t from,
 				   size_t len);
+	int (*writev) (struct mtd_info *mtd, const struct kvec *vecs,
+			unsigned long count, loff_t to, size_t *retlen);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* kvec-based read/write methods.
-	   NB: The 'count' parameter is the number of _vectors_, each of
-	   which contains an (ofs, len) tuple.
-	*/
-	int (*writev) (struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen);
-
 	/* Sync */
 	void (*sync) (struct mtd_info *mtd);
 
@@ -375,6 +371,16 @@ static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	return mtd->lock_user_prot_reg(mtd, from, len);
 }
 
+/*
+ * kvec-based read/write method. NB: The 'count' parameter is the number of
+ * _vectors_, each of which contains an (ofs, len) tuple.
+ */
+static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
+			     unsigned long count, loff_t to, size_t *retlen)
+{
+	return mtd->writev(mtd, vecs, count, to, retlen);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 85f2f2a809d658c15b574df02ede92090f45a1f2 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:03:12 +0200
Subject: mtd: introduce mtd_sync interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/ftl.c       | 2 +-
 drivers/mtd/mtdblock.c  | 4 ++--
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/mtdconcat.c | 2 +-
 drivers/mtd/mtdpart.c   | 2 +-
 drivers/mtd/mtdswap.c   | 2 +-
 drivers/mtd/rfd_ftl.c   | 2 +-
 drivers/mtd/ubi/kapi.c  | 2 +-
 fs/jffs2/super.c        | 2 +-
 fs/logfs/dev_mtd.c      | 2 +-
 include/linux/mtd/mtd.h | 9 ++++++---
 11 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index d591b1d0a6c1..c9c90299c9e2 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -651,7 +651,7 @@ static int reclaim_block(partition_t *part)
 		pr_debug("ftl_cs: waiting for transfer "
 		      "unit to be prepared...\n");
 		if (part->mbd.mtd->sync)
-			part->mbd.mtd->sync(part->mbd.mtd);
+			mtd_sync(part->mbd.mtd);
 	    } else {
 		static int ne = 0;
 		if (++ne < 5)
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index ac7f1f1faa2d..496e1a6e8029 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -323,7 +323,7 @@ static int mtdblock_release(struct mtd_blktrans_dev *mbd)
 	if (!--mtdblk->count) {
 		/* It was the last usage. Free the cache */
 		if (mbd->mtd->sync)
-			mbd->mtd->sync(mbd->mtd);
+			mtd_sync(mbd->mtd);
 		vfree(mtdblk->cache_data);
 	}
 
@@ -343,7 +343,7 @@ static int mtdblock_flush(struct mtd_blktrans_dev *dev)
 	mutex_unlock(&mtdblk->cache_mutex);
 
 	if (dev->mtd->sync)
-		dev->mtd->sync(dev->mtd);
+		mtd_sync(dev->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 86308acb40e0..b5722ecf19d3 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -155,7 +155,7 @@ static int mtdchar_close(struct inode *inode, struct file *file)
 
 	/* Only sync if opened RW */
 	if ((file->f_mode & FMODE_WRITE) && mtd->sync)
-		mtd->sync(mtd);
+		mtd_sync(mtd);
 
 	iput(mfi->ino);
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 6fdae191e1ba..cc2336edfe28 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -620,7 +620,7 @@ static void concat_sync(struct mtd_info *mtd)
 
 	for (i = 0; i < concat->num_subdev; i++) {
 		struct mtd_info *subdev = concat->subdev[i];
-		subdev->sync(subdev);
+		mtd_sync(subdev);
 	}
 }
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index c0bfa88c82f3..2b545052795e 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -301,7 +301,7 @@ static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 static void part_sync(struct mtd_info *mtd)
 {
 	struct mtd_part *part = PART(mtd);
-	part->master->sync(part->master);
+	mtd_sync(part->master);
 }
 
 static int part_suspend(struct mtd_info *mtd)
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 85797390e3dd..cb794e761012 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -1048,7 +1048,7 @@ static int mtdswap_flush(struct mtd_blktrans_dev *dev)
 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
 
 	if (d->mtd->sync)
-		d->mtd->sync(d->mtd);
+		mtd_sync(d->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index c594bb7abfa3..5426d42cdea7 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -449,7 +449,7 @@ static int reclaim_block(struct partition *part, u_long *old_sector)
 
 	/* we have a race if sync doesn't exist */
 	if (part->mbd.mtd->sync)
-		part->mbd.mtd->sync(part->mbd.mtd);
+		mtd_sync(part->mbd.mtd);
 
 	score = 0x7fffffff; /* MAX_INT */
 	best_block = -1;
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 1a35fc5e3b40..9f265cc1a0d3 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -715,7 +715,7 @@ int ubi_sync(int ubi_num)
 		return -ENODEV;
 
 	if (ubi->mtd->sync)
-		ubi->mtd->sync(ubi->mtd);
+		mtd_sync(ubi->mtd);
 
 	ubi_put_device(ubi);
 	return 0;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index e7e974454115..e78bf3cd1b73 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -337,7 +337,7 @@ static void jffs2_put_super (struct super_block *sb)
 	kfree(c->inocache_list);
 	jffs2_clear_xattr_subsystem(c);
 	if (c->mtd->sync)
-		c->mtd->sync(c->mtd);
+		mtd_sync(c->mtd);
 
 	D1(printk(KERN_DEBUG "jffs2_put_super returning\n"));
 }
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 1842440d6564..0ca7a07db6c1 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -120,7 +120,7 @@ static void logfs_mtd_sync(struct super_block *sb)
 	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
 
 	if (mtd->sync)
-		mtd->sync(mtd);
+		mtd_sync(mtd);
 }
 
 static int logfs_mtd_readpage(void *_sb, struct page *page)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 4129cb5c3de4..47ea19c1e523 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -206,15 +206,13 @@ struct mtd_info {
 				   size_t len);
 	int (*writev) (struct mtd_info *mtd, const struct kvec *vecs,
 			unsigned long count, loff_t to, size_t *retlen);
+	void (*sync) (struct mtd_info *mtd);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* Sync */
-	void (*sync) (struct mtd_info *mtd);
-
 	/* Chip-supported device locking */
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
@@ -381,6 +379,11 @@ static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	return mtd->writev(mtd, vecs, count, to, retlen);
 }
 
+static inline void mtd_sync(struct mtd_info *mtd)
+{
+	mtd->sync(mtd);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 7799f9ac8d8ff2db14736950275249df442baeac Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:15:39 +0200
Subject: mtd: introduce mtd_lock interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/scb2_flash.c | 2 +-
 drivers/mtd/mtdchar.c         | 2 +-
 drivers/mtd/mtdconcat.c       | 2 +-
 drivers/mtd/mtdpart.c         | 2 +-
 include/linux/mtd/mtd.h       | 9 +++++++--
 5 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/scb2_flash.c b/drivers/mtd/maps/scb2_flash.c
index d88c8426bb0f..01af34778de3 100644
--- a/drivers/mtd/maps/scb2_flash.c
+++ b/drivers/mtd/maps/scb2_flash.c
@@ -205,7 +205,7 @@ scb2_flash_remove(struct pci_dev *dev)
 
 	/* disable flash writes */
 	if (scb2_mtd->lock)
-		scb2_mtd->lock(scb2_mtd, 0, scb2_mtd->size);
+		mtd_lock(scb2_mtd, 0, scb2_mtd->size);
 
 	mtd_device_unregister(scb2_mtd);
 	map_destroy(scb2_mtd);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index b5722ecf19d3..870f2cb415cb 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -824,7 +824,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->lock)
 			ret = -EOPNOTSUPP;
 		else
-			ret = mtd->lock(mtd, einfo.start, einfo.length);
+			ret = mtd_lock(mtd, einfo.start, einfo.length);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index cc2336edfe28..97d6360986c8 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -556,7 +556,7 @@ static int concat_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 			size = len;
 
 		if (subdev->lock) {
-			err = subdev->lock(subdev, ofs, size);
+			err = mtd_lock(subdev, ofs, size);
 			if (err)
 				break;
 		} else
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 2b545052795e..a5e7a2103dcf 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -279,7 +279,7 @@ static int part_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	struct mtd_part *part = PART(mtd);
 	if ((len + ofs) > mtd->size)
 		return -EINVAL;
-	return part->master->lock(part->master, ofs + part->offset, len);
+	return mtd_lock(part->master, ofs + part->offset, len);
 }
 
 static int part_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 47ea19c1e523..167bac2e380e 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -207,14 +207,13 @@ struct mtd_info {
 	int (*writev) (struct mtd_info *mtd, const struct kvec *vecs,
 			unsigned long count, loff_t to, size_t *retlen);
 	void (*sync) (struct mtd_info *mtd);
+	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* Chip-supported device locking */
-	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
@@ -384,6 +383,12 @@ static inline void mtd_sync(struct mtd_info *mtd)
 	mtd->sync(mtd);
 }
 
+/* Chip-supported device locking */
+static inline int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	return mtd->lock(mtd, ofs, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From b66005cd3e6f104e0a1b6492110c337269b53ec3 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:18:22 +0200
Subject: mtd: introduce mtd_unlock interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/mtdconcat.c | 2 +-
 drivers/mtd/mtdcore.c   | 2 +-
 drivers/mtd/mtdpart.c   | 2 +-
 include/linux/mtd/mtd.h | 7 ++++++-
 5 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 870f2cb415cb..fe09cd2a4540 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -838,7 +838,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->unlock)
 			ret = -EOPNOTSUPP;
 		else
-			ret = mtd->unlock(mtd, einfo.start, einfo.length);
+			ret = mtd_unlock(mtd, einfo.start, einfo.length);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 97d6360986c8..272ebc01f95b 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -596,7 +596,7 @@ static int concat_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 			size = len;
 
 		if (subdev->unlock) {
-			err = subdev->unlock(subdev, ofs, size);
+			err = mtd_unlock(subdev, ofs, size);
 			if (err)
 				break;
 		} else
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index e36191ab47c3..4a2155748fa3 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -340,7 +340,7 @@ int add_mtd_device(struct mtd_info *mtd)
 	/* Some chips always power up locked. Unlock them now */
 	if ((mtd->flags & MTD_WRITEABLE)
 	    && (mtd->flags & MTD_POWERUP_LOCK) && mtd->unlock) {
-		if (mtd->unlock(mtd, 0, mtd->size))
+		if (mtd_unlock(mtd, 0, mtd->size))
 			printk(KERN_WARNING
 			       "%s: unlock failed, writes may not work\n",
 			       mtd->name);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index a5e7a2103dcf..d65af3752331 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -287,7 +287,7 @@ static int part_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	struct mtd_part *part = PART(mtd);
 	if ((len + ofs) > mtd->size)
 		return -EINVAL;
-	return part->master->unlock(part->master, ofs + part->offset, len);
+	return mtd_unlock(part->master, ofs + part->offset, len);
 }
 
 static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 167bac2e380e..f30c35886f7c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -208,13 +208,13 @@ struct mtd_info {
 			unsigned long count, loff_t to, size_t *retlen);
 	void (*sync) (struct mtd_info *mtd);
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
 	/* Power Management functions */
@@ -389,6 +389,11 @@ static inline int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return mtd->lock(mtd, ofs, len);
 }
 
+static inline int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	return mtd->unlock(mtd, ofs, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From e95e9786455c11c8eac30d76e5289d4e40187f9a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:21:16 +0200
Subject: mtd: introduce mtd_is_locked interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/mtdpart.c   | 2 +-
 include/linux/mtd/mtd.h | 8 ++++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index fe09cd2a4540..6d598b23cf3a 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -852,7 +852,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->is_locked)
 			ret = -EOPNOTSUPP;
 		else
-			ret = mtd->is_locked(mtd, einfo.start, einfo.length);
+			ret = mtd_is_locked(mtd, einfo.start, einfo.length);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index d65af3752331..ad487fcd423f 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -295,7 +295,7 @@ static int part_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	struct mtd_part *part = PART(mtd);
 	if ((len + ofs) > mtd->size)
 		return -EINVAL;
-	return part->master->is_locked(part->master, ofs + part->offset, len);
+	return mtd_is_locked(part->master, ofs + part->offset, len);
 }
 
 static void part_sync(struct mtd_info *mtd)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index f30c35886f7c..8b9901986c86 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -209,14 +209,13 @@ struct mtd_info {
 	void (*sync) (struct mtd_info *mtd);
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
-
 	/* Power Management functions */
 	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
@@ -394,6 +393,11 @@ static inline int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return mtd->unlock(mtd, ofs, len);
 }
 
+static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	return mtd->is_locked(mtd, ofs, len);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 3fe4bae88460869a8e553397cd9057a4ee7ca341 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:25:16 +0200
Subject: mtd: introduce mtd_suspend interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap.c        | 2 +-
 drivers/mtd/maps/pxa2xx-flash.c   | 2 +-
 drivers/mtd/maps/rbtx4939-flash.c | 2 +-
 drivers/mtd/maps/sa1100-flash.c   | 2 +-
 drivers/mtd/mtdconcat.c           | 2 +-
 drivers/mtd/mtdcore.c             | 2 +-
 drivers/mtd/mtdpart.c             | 2 +-
 drivers/mtd/nand/nomadik_nand.c   | 2 +-
 drivers/mtd/nand/pxa3xx_nand.c    | 2 +-
 include/linux/mtd/mtd.h           | 7 ++++++-
 10 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index 1f749d58ae6f..b7f0cd14ae2b 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -191,7 +191,7 @@ static void physmap_flash_shutdown(struct platform_device *dev)
 
 	for (i = 0; i < MAX_RESOURCES && info->mtd[i]; i++)
 		if (info->mtd[i]->suspend && info->mtd[i]->resume)
-			if (info->mtd[i]->suspend(info->mtd[i]) == 0)
+			if (mtd_suspend(info->mtd[i]) == 0)
 				info->mtd[i]->resume(info->mtd[i]);
 }
 #else
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index 274e39914332..9cb427320c04 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -125,7 +125,7 @@ static void pxa2xx_flash_shutdown(struct platform_device *dev)
 {
 	struct pxa2xx_flash_info *info = platform_get_drvdata(dev);
 
-	if (info && info->mtd->suspend(info->mtd) == 0)
+	if (info && mtd_suspend(info->mtd) == 0)
 		info->mtd->resume(info->mtd);
 }
 #else
diff --git a/drivers/mtd/maps/rbtx4939-flash.c b/drivers/mtd/maps/rbtx4939-flash.c
index bb7d2042affa..5856aa2d99f7 100644
--- a/drivers/mtd/maps/rbtx4939-flash.c
+++ b/drivers/mtd/maps/rbtx4939-flash.c
@@ -120,7 +120,7 @@ static void rbtx4939_flash_shutdown(struct platform_device *dev)
 	struct rbtx4939_flash_info *info = platform_get_drvdata(dev);
 
 	if (info->mtd->suspend && info->mtd->resume)
-		if (info->mtd->suspend(info->mtd) == 0)
+		if (mtd_suspend(info->mtd) == 0)
 			info->mtd->resume(info->mtd);
 }
 #else
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index ac3a290748cd..20944f054867 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -377,7 +377,7 @@ static int __exit sa1100_mtd_remove(struct platform_device *pdev)
 static void sa1100_mtd_shutdown(struct platform_device *dev)
 {
 	struct sa_info *info = platform_get_drvdata(dev);
-	if (info && info->mtd->suspend(info->mtd) == 0)
+	if (info && mtd_suspend(info->mtd) == 0)
 		info->mtd->resume(info->mtd);
 }
 #else
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 272ebc01f95b..36bb1c99925b 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -631,7 +631,7 @@ static int concat_suspend(struct mtd_info *mtd)
 
 	for (i = 0; i < concat->num_subdev; i++) {
 		struct mtd_info *subdev = concat->subdev[i];
-		if ((rc = subdev->suspend(subdev)) < 0)
+		if ((rc = mtd_suspend(subdev)) < 0)
 			return rc;
 	}
 	return rc;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 4a2155748fa3..0db455d31148 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -119,7 +119,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 	struct mtd_info *mtd = dev_to_mtd(dev);
 
 	if (mtd && mtd->suspend)
-		return mtd->suspend(mtd);
+		return mtd_suspend(mtd);
 	else
 		return 0;
 }
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index ad487fcd423f..c5e556a92641 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -307,7 +307,7 @@ static void part_sync(struct mtd_info *mtd)
 static int part_suspend(struct mtd_info *mtd)
 {
 	struct mtd_part *part = PART(mtd);
-	return part->master->suspend(part->master);
+	return mtd_suspend(part->master);
 }
 
 static void part_resume(struct mtd_info *mtd)
diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c
index 673dc6c68f9a..9461babdb308 100644
--- a/drivers/mtd/nand/nomadik_nand.c
+++ b/drivers/mtd/nand/nomadik_nand.c
@@ -201,7 +201,7 @@ static int nomadik_nand_suspend(struct device *dev)
 	struct nomadik_nand_host *host = dev_get_drvdata(dev);
 	int ret = 0;
 	if (host)
-		ret = host->mtd.suspend(&host->mtd);
+		ret = mtd_suspend(&host->mtd);
 	return ret;
 }
 
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 90d60169ae40..7a028cf1206e 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1258,7 +1258,7 @@ static int pxa3xx_nand_suspend(struct platform_device *pdev, pm_message_t state)
 
 	for (cs = 0; cs < pdata->num_cs; cs++) {
 		mtd = info->host[cs]->mtd;
-		mtd->suspend(mtd);
+		mtd_suspend(mtd);
 	}
 
 	return 0;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 8b9901986c86..8e01bad44e25 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -210,6 +210,7 @@ struct mtd_info {
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*suspend) (struct mtd_info *mtd);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -217,7 +218,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 	/* Power Management functions */
-	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
 
 	/* Bad block management functions */
@@ -398,6 +398,11 @@ static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	return mtd->is_locked(mtd, ofs, len);
 }
 
+static inline int mtd_suspend(struct mtd_info *mtd)
+{
+	return mtd->suspend(mtd);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From ead995f8d4da1e2f1ef40b0e5f4133fee38a3d3d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:31:25 +0200
Subject: mtd: introduce mtd_resume interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap.c        | 2 +-
 drivers/mtd/maps/pxa2xx-flash.c   | 2 +-
 drivers/mtd/maps/rbtx4939-flash.c | 2 +-
 drivers/mtd/maps/sa1100-flash.c   | 2 +-
 drivers/mtd/mtdconcat.c           | 2 +-
 drivers/mtd/mtdcore.c             | 2 +-
 drivers/mtd/mtdpart.c             | 2 +-
 drivers/mtd/nand/nomadik_nand.c   | 2 +-
 drivers/mtd/nand/pxa3xx_nand.c    | 2 +-
 include/linux/mtd/mtd.h           | 9 ++++++---
 10 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index b7f0cd14ae2b..d94cc62186c1 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -192,7 +192,7 @@ static void physmap_flash_shutdown(struct platform_device *dev)
 	for (i = 0; i < MAX_RESOURCES && info->mtd[i]; i++)
 		if (info->mtd[i]->suspend && info->mtd[i]->resume)
 			if (mtd_suspend(info->mtd[i]) == 0)
-				info->mtd[i]->resume(info->mtd[i]);
+				mtd_resume(info->mtd[i]);
 }
 #else
 #define physmap_flash_shutdown NULL
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index 9cb427320c04..436d121185b1 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -126,7 +126,7 @@ static void pxa2xx_flash_shutdown(struct platform_device *dev)
 	struct pxa2xx_flash_info *info = platform_get_drvdata(dev);
 
 	if (info && mtd_suspend(info->mtd) == 0)
-		info->mtd->resume(info->mtd);
+		mtd_resume(info->mtd);
 }
 #else
 #define pxa2xx_flash_shutdown NULL
diff --git a/drivers/mtd/maps/rbtx4939-flash.c b/drivers/mtd/maps/rbtx4939-flash.c
index 5856aa2d99f7..717628312040 100644
--- a/drivers/mtd/maps/rbtx4939-flash.c
+++ b/drivers/mtd/maps/rbtx4939-flash.c
@@ -121,7 +121,7 @@ static void rbtx4939_flash_shutdown(struct platform_device *dev)
 
 	if (info->mtd->suspend && info->mtd->resume)
 		if (mtd_suspend(info->mtd) == 0)
-			info->mtd->resume(info->mtd);
+			mtd_resume(info->mtd);
 }
 #else
 #define rbtx4939_flash_shutdown NULL
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index 20944f054867..502821997707 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -378,7 +378,7 @@ static void sa1100_mtd_shutdown(struct platform_device *dev)
 {
 	struct sa_info *info = platform_get_drvdata(dev);
 	if (info && mtd_suspend(info->mtd) == 0)
-		info->mtd->resume(info->mtd);
+		mtd_resume(info->mtd);
 }
 #else
 #define sa1100_mtd_shutdown NULL
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 36bb1c99925b..4b7f825ce015 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -644,7 +644,7 @@ static void concat_resume(struct mtd_info *mtd)
 
 	for (i = 0; i < concat->num_subdev; i++) {
 		struct mtd_info *subdev = concat->subdev[i];
-		subdev->resume(subdev);
+		mtd_resume(subdev);
 	}
 }
 
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 0db455d31148..376fbfdb09aa 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -129,7 +129,7 @@ static int mtd_cls_resume(struct device *dev)
 	struct mtd_info *mtd = dev_to_mtd(dev);
 	
 	if (mtd && mtd->resume)
-		mtd->resume(mtd);
+		mtd_resume(mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index c5e556a92641..8610750852ac 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -313,7 +313,7 @@ static int part_suspend(struct mtd_info *mtd)
 static void part_resume(struct mtd_info *mtd)
 {
 	struct mtd_part *part = PART(mtd);
-	part->master->resume(part->master);
+	mtd_resume(part->master);
 }
 
 static int part_block_isbad(struct mtd_info *mtd, loff_t ofs)
diff --git a/drivers/mtd/nand/nomadik_nand.c b/drivers/mtd/nand/nomadik_nand.c
index 9461babdb308..a86aa812ca13 100644
--- a/drivers/mtd/nand/nomadik_nand.c
+++ b/drivers/mtd/nand/nomadik_nand.c
@@ -209,7 +209,7 @@ static int nomadik_nand_resume(struct device *dev)
 {
 	struct nomadik_nand_host *host = dev_get_drvdata(dev);
 	if (host)
-		host->mtd.resume(&host->mtd);
+		mtd_resume(&host->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 7a028cf1206e..8544d6bf50a0 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1291,7 +1291,7 @@ static int pxa3xx_nand_resume(struct platform_device *pdev)
 	nand_writel(info, NDSR, NDSR_MASK);
 	for (cs = 0; cs < pdata->num_cs; cs++) {
 		mtd = info->host[cs]->mtd;
-		mtd->resume(mtd);
+		mtd_resume(mtd);
 	}
 
 	return 0;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 8e01bad44e25..d6b4aa177505 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -211,15 +211,13 @@ struct mtd_info {
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*suspend) (struct mtd_info *mtd);
+	void (*resume) (struct mtd_info *mtd);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* Power Management functions */
-	void (*resume) (struct mtd_info *mtd);
-
 	/* Bad block management functions */
 	int (*block_isbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
@@ -403,6 +401,11 @@ static inline int mtd_suspend(struct mtd_info *mtd)
 	return mtd->suspend(mtd);
 }
 
+static inline void mtd_resume(struct mtd_info *mtd)
+{
+	mtd->resume(mtd);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 7086c19d07429d697057587caf1e5e0345442d16 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:35:30 +0200
Subject: mtd: introduce mtd_block_isbad interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/cris/arch-v32/drivers/axisflashmap.c | 3 +--
 drivers/mtd/inftlmount.c                  | 3 ++-
 drivers/mtd/mtdchar.c                     | 2 +-
 drivers/mtd/mtdconcat.c                   | 2 +-
 drivers/mtd/mtdoops.c                     | 4 ++--
 drivers/mtd/mtdpart.c                     | 5 ++---
 drivers/mtd/mtdswap.c                     | 4 ++--
 drivers/mtd/nftlmount.c                   | 3 ++-
 drivers/mtd/redboot.c                     | 4 ++--
 drivers/mtd/ssfdc.c                       | 4 ++--
 drivers/mtd/tests/mtd_oobtest.c           | 2 +-
 drivers/mtd/tests/mtd_pagetest.c          | 2 +-
 drivers/mtd/tests/mtd_readtest.c          | 2 +-
 drivers/mtd/tests/mtd_speedtest.c         | 2 +-
 drivers/mtd/tests/mtd_stresstest.c        | 2 +-
 drivers/mtd/tests/mtd_subpagetest.c       | 2 +-
 drivers/mtd/tests/mtd_torturetest.c       | 3 +--
 drivers/mtd/ubi/io.c                      | 2 +-
 fs/jffs2/scan.c                           | 2 +-
 fs/logfs/dev_mtd.c                        | 4 ++--
 include/linux/mtd/mtd.h                   | 7 ++++++-
 21 files changed, 34 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c
index 011bddbf073f..b34438e026be 100644
--- a/arch/cris/arch-v32/drivers/axisflashmap.c
+++ b/arch/cris/arch-v32/drivers/axisflashmap.c
@@ -404,8 +404,7 @@ static int __init init_axis_flash(void)
 		 */
 		int blockstat;
 		do {
-			blockstat = main_mtd->block_isbad(main_mtd,
-				ptable_sector);
+			blockstat = mtd_block_isbad(main_mtd, ptable_sector);
 			if (blockstat < 0)
 				ptable_sector = 0; /* read error */
 			else if (blockstat)
diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 9bfbca5d88d6..38519401196b 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -306,7 +306,8 @@ static int find_boot_record(struct INFTLrecord *inftl)
 			/* If any of the physical eraseblocks are bad, don't
 			   use the unit. */
 			for (physblock = 0; physblock < inftl->EraseSize; physblock += inftl->mbd.mtd->erasesize) {
-				if (inftl->mbd.mtd->block_isbad(inftl->mbd.mtd, i * inftl->EraseSize + physblock))
+				if (mtd_block_isbad(inftl->mbd.mtd,
+						    i * inftl->EraseSize + physblock))
 					inftl->PUtable[i] = BLOCK_RESERVED;
 			}
 		}
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 6d598b23cf3a..a499bf7a8214 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -886,7 +886,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->block_isbad)
 			ret = -EOPNOTSUPP;
 		else
-			return mtd->block_isbad(mtd, offs);
+			return mtd_block_isbad(mtd, offs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 4b7f825ce015..d0db5e61d5af 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -667,7 +667,7 @@ static int concat_block_isbad(struct mtd_info *mtd, loff_t ofs)
 			continue;
 		}
 
-		res = subdev->block_isbad(subdev, ofs);
+		res = mtd_block_isbad(subdev, ofs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 7be2018ffbcc..bc43d2f7272c 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -170,7 +170,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work)
 	}
 
 	while (mtd->block_isbad) {
-		ret = mtd->block_isbad(mtd, cxt->nextpage * record_size);
+		ret = mtd_block_isbad(mtd, cxt->nextpage * record_size);
 		if (!ret)
 			break;
 		if (ret < 0) {
@@ -254,7 +254,7 @@ static void find_next_position(struct mtdoops_context *cxt)
 
 	for (page = 0; page < cxt->oops_pages; page++) {
 		if (mtd->block_isbad &&
-		    mtd->block_isbad(mtd, page * record_size))
+		    mtd_block_isbad(mtd, page * record_size))
 			continue;
 		/* Assume the page is used */
 		mark_page_used(cxt, page);
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 8610750852ac..0e7dfc79d337 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -322,7 +322,7 @@ static int part_block_isbad(struct mtd_info *mtd, loff_t ofs)
 	if (ofs >= mtd->size)
 		return -EINVAL;
 	ofs += part->offset;
-	return part->master->block_isbad(part->master, ofs);
+	return mtd_block_isbad(part->master, ofs);
 }
 
 static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
@@ -553,8 +553,7 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 		uint64_t offs = 0;
 
 		while (offs < slave->mtd.size) {
-			if (master->block_isbad(master,
-						offs + slave->offset))
+			if (mtd_block_isbad(master, offs + slave->offset))
 				slave->mtd.ecc_stats.badblocks++;
 			offs += slave->mtd.erasesize;
 		}
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index cb794e761012..87aa0a6323c3 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -343,7 +343,7 @@ static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 	offset = mtdswap_eb_offset(d, eb);
 
 	/* Check first if the block is bad. */
-	if (d->mtd->block_isbad && d->mtd->block_isbad(d->mtd, offset))
+	if (d->mtd->block_isbad && mtd_block_isbad(d->mtd, offset))
 		return MTDSWAP_SCANNED_BAD;
 
 	ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
@@ -1061,7 +1061,7 @@ static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
 
 	if (mtd->block_isbad)
 		for (offset = 0; offset < size; offset += mtd->erasesize)
-			if (mtd->block_isbad(mtd, offset))
+			if (mtd_block_isbad(mtd, offset))
 				badcnt++;
 
 	return badcnt;
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index b068dc8a3666..156af9f87961 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -242,7 +242,8 @@ The new DiskOnChip driver already scanned the bad block table.  Just query it.
 			if (buf[i & (SECTORSIZE - 1)] != 0xff)
 				nftl->ReplUnitTable[i] = BLOCK_RESERVED;
 #endif
-			if (nftl->mbd.mtd->block_isbad(nftl->mbd.mtd, i * nftl->EraseSize))
+			if (mtd_block_isbad(nftl->mbd.mtd,
+					    i * nftl->EraseSize))
 				nftl->ReplUnitTable[i] = BLOCK_RESERVED;
 		}
 
diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c
index 623d9b86d0d9..09bb81ea3a7e 100644
--- a/drivers/mtd/redboot.c
+++ b/drivers/mtd/redboot.c
@@ -79,7 +79,7 @@ static int parse_redboot_partitions(struct mtd_info *master,
 	if ( directory < 0 ) {
 		offset = master->size + directory * master->erasesize;
 		while (master->block_isbad && 
-		       master->block_isbad(master, offset)) {
+		       mtd_block_isbad(master, offset)) {
 			if (!offset) {
 			nogood:
 				printk(KERN_NOTICE "Failed to find a non-bad block to check for RedBoot partition table\n");
@@ -90,7 +90,7 @@ static int parse_redboot_partitions(struct mtd_info *master,
 	} else {
 		offset = directory * master->erasesize;
 		while (master->block_isbad && 
-		       master->block_isbad(master, offset)) {
+		       mtd_block_isbad(master, offset)) {
 			offset += master->erasesize;
 			if (offset == master->size)
 				goto nogood;
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index 0e6881338357..ab2a52a039c3 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -122,7 +122,7 @@ static int get_valid_cis_sector(struct mtd_info *mtd)
 	 * is not SSFDC formatted
 	 */
 	for (k = 0, offset = 0; k < 4; k++, offset += mtd->erasesize) {
-		if (!mtd->block_isbad(mtd, offset)) {
+		if (mtd_block_isbad(mtd, offset)) {
 			ret = mtd_read(mtd, offset, SECTOR_SIZE, &retlen,
 				       sect_buf);
 
@@ -255,7 +255,7 @@ static int build_logical_block_map(struct ssfdcr_record *ssfdc)
 	for (phys_block = ssfdc->cis_block + 1; phys_block < ssfdc->map_len;
 			phys_block++) {
 		offset = (unsigned long)phys_block * ssfdc->erase_size;
-		if (mtd->block_isbad(mtd, offset))
+		if (mtd_block_isbad(mtd, offset))
 			continue;	/* skip bad blocks */
 
 		ret = read_raw_oob(mtd, offset, oob_buf);
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/mtd_oobtest.c
index 81113885e086..ed9b62827f1b 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/mtd_oobtest.c
@@ -329,7 +329,7 @@ static int is_block_bad(int ebnum)
 	int ret;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/mtd_pagetest.c
index 83da97e54f97..8024eaf4c1ac 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/mtd_pagetest.c
@@ -469,7 +469,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
index 5eaeada84284..ad5fd0df86ee 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/mtd_readtest.c
@@ -132,7 +132,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index c7b18e189082..ecb287847505 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -296,7 +296,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index f8aac4b7e59a..4789c0ee3e9a 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -132,7 +132,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/mtd_subpagetest.c
index b90c01036b49..4b873d49fe6a 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/mtd_subpagetest.c
@@ -344,7 +344,7 @@ static int is_block_bad(int ebnum)
 	loff_t addr = ebnum * mtd->erasesize;
 	int ret;
 
-	ret = mtd->block_isbad(mtd, addr);
+	ret = mtd_block_isbad(mtd, addr);
 	if (ret)
 		printk(PRINT_PREF "block %d is bad\n", ebnum);
 	return ret;
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index dd34a519fa7a..30c4ed9855ec 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -292,8 +292,7 @@ static int __init tort_init(void)
 	memset(&bad_ebs[0], 0, sizeof(int) * ebcnt);
 	if (mtd->block_isbad) {
 		for (i = eb; i < eb + ebcnt; i++) {
-			err = mtd->block_isbad(mtd,
-					       (loff_t)i * mtd->erasesize);
+			err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize);
 
 			if (err < 0) {
 				printk(PRINT_PREF "block_isbad() returned %d "
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 8d832fc9e9e4..a1b683ad639e 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -634,7 +634,7 @@ int ubi_io_is_bad(const struct ubi_device *ubi, int pnum)
 	if (ubi->bad_allowed) {
 		int ret;
 
-		ret = mtd->block_isbad(mtd, (loff_t)pnum * ubi->peb_size);
+		ret = mtd_block_isbad(mtd, (loff_t)pnum * ubi->peb_size);
 		if (ret < 0)
 			ubi_err("error %d while checking if PEB %d is bad",
 				ret, pnum);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 72f3960f44a9..83e1665e2574 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -455,7 +455,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
 	if (jffs2_cleanmarker_oob(c)) {
 		int ret;
 
-		if (c->mtd->block_isbad(c->mtd, jeb->offset))
+		if (mtd_block_isbad(c->mtd, jeb->offset))
 			return BLK_STATE_BADBLOCK;
 
 		ret = jffs2_check_nand_cleanmarker(c, jeb);
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 0ca7a07db6c1..136c7360a9b6 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -157,7 +157,7 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs)
 		return NULL;
 
 	*ofs = 0;
-	while (mtd->block_isbad(mtd, *ofs)) {
+	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs += mtd->erasesize;
 		if (*ofs >= mtd->size)
 			return NULL;
@@ -177,7 +177,7 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs)
 		return NULL;
 
 	*ofs = mtd->size - mtd->erasesize;
-	while (mtd->block_isbad(mtd, *ofs)) {
+	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs -= mtd->erasesize;
 		if (*ofs <= 0)
 			return NULL;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index d6b4aa177505..a307ad093a54 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -210,6 +210,7 @@ struct mtd_info {
 	int (*lock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
+	int (*block_isbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
 
@@ -219,7 +220,6 @@ struct mtd_info {
 	struct backing_dev_info *backing_dev_info;
 
 	/* Bad block management functions */
-	int (*block_isbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
 
 	struct notifier_block reboot_notifier;  /* default mode before reboot */
@@ -406,6 +406,11 @@ static inline void mtd_resume(struct mtd_info *mtd)
 	mtd->resume(mtd);
 }
 
+static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
+{
+	return mtd->block_isbad(mtd, ofs);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From 5942ddbc500d1c9b75e571b656be97f65b26adfe Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 23 Dec 2011 19:37:38 +0200
Subject: mtd: introduce mtd_block_markbad interface

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/inftlmount.c           | 2 +-
 drivers/mtd/mtdchar.c              | 2 +-
 drivers/mtd/mtdconcat.c            | 2 +-
 drivers/mtd/mtdoops.c              | 2 +-
 drivers/mtd/mtdpart.c              | 2 +-
 drivers/mtd/mtdswap.c              | 2 +-
 drivers/mtd/nand/nandsim.c         | 2 +-
 drivers/mtd/nftlmount.c            | 2 +-
 drivers/mtd/onenand/onenand_base.c | 2 +-
 drivers/mtd/ubi/io.c               | 2 +-
 fs/jffs2/wbuf.c                    | 2 +-
 include/linux/mtd/mtd.h            | 9 ++++++---
 12 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/inftlmount.c b/drivers/mtd/inftlmount.c
index 38519401196b..4adc0374fb6b 100644
--- a/drivers/mtd/inftlmount.c
+++ b/drivers/mtd/inftlmount.c
@@ -424,7 +424,7 @@ int INFTL_formatblock(struct INFTLrecord *inftl, int block)
 fail:
 	/* could not format, update the bad block table (caller is responsible
 	   for setting the PUtable to BLOCK_RESERVED on failure) */
-	inftl->mbd.mtd->block_markbad(inftl->mbd.mtd, instr->addr);
+	mtd_block_markbad(inftl->mbd.mtd, instr->addr);
 	return -1;
 }
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index a499bf7a8214..15a3f6224be4 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -899,7 +899,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (!mtd->block_markbad)
 			ret = -EOPNOTSUPP;
 		else
-			return mtd->block_markbad(mtd, offs);
+			return mtd_block_markbad(mtd, offs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index d0db5e61d5af..f694b51e7856 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -693,7 +693,7 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs)
 			continue;
 		}
 
-		err = subdev->block_markbad(subdev, ofs);
+		err = mtd_block_markbad(subdev, ofs);
 		if (!err)
 			mtd->ecc_stats.badblocks++;
 		break;
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index bc43d2f7272c..69532a34e563 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -200,7 +200,7 @@ badblock:
 	}
 
 	if (mtd->block_markbad && ret == -EIO) {
-		ret = mtd->block_markbad(mtd, cxt->nextpage * record_size);
+		ret = mtd_block_markbad(mtd, cxt->nextpage * record_size);
 		if (ret < 0) {
 			printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n");
 			return;
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 0e7dfc79d337..a3d44c3416b4 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -335,7 +335,7 @@ static int part_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	if (ofs >= mtd->size)
 		return -EINVAL;
 	ofs += part->offset;
-	res = part->master->block_markbad(part->master, ofs);
+	res = mtd_block_markbad(part->master, ofs);
 	if (!res)
 		mtd->ecc_stats.badblocks++;
 	return res;
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 87aa0a6323c3..4441c08b082d 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -279,7 +279,7 @@ static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
 
 	offset = mtdswap_eb_offset(d, eb);
 	dev_warn(d->dev, "Marking bad block at %08llx\n", offset);
-	ret = d->mtd->block_markbad(d->mtd, offset);
+	ret = mtd_block_markbad(d->mtd, offset);
 
 	if (ret) {
 		dev_warn(d->dev, "Mark block bad failed for block at %08llx "
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 34c03be77301..261f478f8cc3 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -737,7 +737,7 @@ static int parse_badblocks(struct nandsim *ns, struct mtd_info *mtd)
 			return -EINVAL;
 		}
 		offset = erase_block_no * ns->geom.secsz;
-		if (mtd->block_markbad(mtd, offset)) {
+		if (mtd_block_markbad(mtd, offset)) {
 			NS_ERR("invalid badblocks.\n");
 			return -EINVAL;
 		}
diff --git a/drivers/mtd/nftlmount.c b/drivers/mtd/nftlmount.c
index 156af9f87961..51b9d6af307f 100644
--- a/drivers/mtd/nftlmount.c
+++ b/drivers/mtd/nftlmount.c
@@ -356,7 +356,7 @@ int NFTL_formatblock(struct NFTLrecord *nftl, int block)
 fail:
 	/* could not format, update the bad block table (caller is responsible
 	   for setting the ReplUnitTable to BLOCK_RESERVED on failure) */
-	nftl->mbd.mtd->block_markbad(nftl->mbd.mtd, instr->addr);
+	mtd_block_markbad(nftl->mbd.mtd, instr->addr);
 	return -1;
 }
 
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index a8394730b4b6..dd278a284136 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -2645,7 +2645,7 @@ static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	}
 
 	onenand_get_device(mtd, FL_WRITING);
-	ret = this->block_markbad(mtd, ofs);
+	ret = mtd_block_markbad(mtd, ofs);
 	onenand_release_device(mtd);
 	return ret;
 }
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index a1b683ad639e..5cde4e5ca3e5 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -669,7 +669,7 @@ int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum)
 	if (!ubi->bad_allowed)
 		return 0;
 
-	err = mtd->block_markbad(mtd, (loff_t)pnum * ubi->peb_size);
+	err = mtd_block_markbad(mtd, (loff_t)pnum * ubi->peb_size);
 	if (err)
 		ubi_err("cannot mark PEB %d bad, error %d", pnum, err);
 	return err;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index eae5be483682..fd96b757433f 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1134,7 +1134,7 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *
 		return 1; // What else can we do?
 
 	printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset);
-	ret = c->mtd->block_markbad(c->mtd, bad_offset);
+	ret = mtd_block_markbad(c->mtd, bad_offset);
 
 	if (ret) {
 		D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret));
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a307ad093a54..64aa54fba2df 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -211,6 +211,7 @@ struct mtd_info {
 	int (*unlock) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*is_locked) (struct mtd_info *mtd, loff_t ofs, uint64_t len);
 	int (*block_isbad) (struct mtd_info *mtd, loff_t ofs);
+	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
 
@@ -219,9 +220,6 @@ struct mtd_info {
 	 */
 	struct backing_dev_info *backing_dev_info;
 
-	/* Bad block management functions */
-	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
-
 	struct notifier_block reboot_notifier;  /* default mode before reboot */
 
 	/* ECC status information */
@@ -411,6 +409,11 @@ static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
 	return mtd->block_isbad(mtd, ofs);
 }
 
+static inline int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs)
+{
+	return mtd->block_markbad(mtd, ofs);
+}
+
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
 	return dev ? dev_get_drvdata(dev) : NULL;
-- 
cgit v1.2.3


From a88d2dc672192247a6f42c82d558db9bf9258bed Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 11:06:10 +0200
Subject: mtd: move mtd->{get,put}_device functions up

Move the 'get_device()' and 'put_device()' functions up within
'struct mtd_info' to make them be close to other functions.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 64aa54fba2df..8ae37e9d45de 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -214,6 +214,12 @@ struct mtd_info {
 	int (*block_markbad) (struct mtd_info *mtd, loff_t ofs);
 	int (*suspend) (struct mtd_info *mtd);
 	void (*resume) (struct mtd_info *mtd);
+	/*
+	 * If the driver is something smart, like UBI, it may need to maintain
+	 * its own reference counting. The below functions are only for driver.
+	 */
+	int (*get_device) (struct mtd_info *mtd);
+	void (*put_device) (struct mtd_info *mtd);
 
 	/* Backing device capabilities for this device
 	 * - provides mmap capabilities
@@ -232,13 +238,6 @@ struct mtd_info {
 	struct module *owner;
 	struct device dev;
 	int usecount;
-
-	/* If the driver is something smart, like UBI, it may need to maintain
-	 * its own reference counting. The below functions are only for driver.
-	 * The driver may register its callbacks. These callbacks are not
-	 * supposed to be called by MTD users */
-	int (*get_device) (struct mtd_info *mtd);
-	void (*put_device) (struct mtd_info *mtd);
 };
 
 /*
-- 
cgit v1.2.3


From 9cf075f8656524abc44ad3ff2ec3834fe76f186f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 18:14:49 +0200
Subject: mtd: always initialize retlen to zero

Make sure that the retlen is set to 0 in case of error. This harmonizes
drivers - some set it to 0 in some error cases and do not write anything
in other error cases. Now we can do this consistently for all drivers.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 8ae37e9d45de..a09077aca45b 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -258,6 +258,7 @@ static inline int mtd_erase(struct mtd_info *mtd, struct erase_info *instr)
 static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len,
 			    size_t *retlen, void **virt, resource_size_t *phys)
 {
+	*retlen = 0;
 	return mtd->point(mtd, from, len, retlen, virt, phys);
 }
 
@@ -289,6 +290,7 @@ static inline int mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
 static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 			    size_t *retlen, const u_char *buf)
 {
+	*retlen = 0;
 	return mtd->write(mtd, to, len, retlen, buf);
 }
 
@@ -302,18 +304,21 @@ static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 static inline int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 				  size_t *retlen, const u_char *buf)
 {
+	*retlen = 0;
 	return mtd->panic_write(mtd, to, len, retlen, buf);
 }
 
 static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from,
 			       struct mtd_oob_ops *ops)
 {
+	ops->retlen = ops->oobretlen = 0;
 	return mtd->read_oob(mtd, from, ops);
 }
 
 static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
 				struct mtd_oob_ops *ops)
 {
+	ops->retlen = ops->oobretlen = 0;
 	return mtd->write_oob(mtd, to, ops);
 }
 
@@ -332,6 +337,7 @@ static inline int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 					 size_t len, size_t *retlen,
 					 u_char *buf)
 {
+	*retlen = 0;
 	return mtd->read_fact_prot_reg(mtd, from, len, retlen, buf);
 }
 
@@ -346,6 +352,7 @@ static inline int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 					 size_t len, size_t *retlen,
 					 u_char *buf)
 {
+	*retlen = 0;
 	return mtd->read_user_prot_reg(mtd, from, len, retlen, buf);
 }
 
@@ -353,6 +360,7 @@ static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
 					  size_t len, size_t *retlen,
 					  u_char *buf)
 {
+	*retlen = 0;
 	return mtd->write_user_prot_reg(mtd, to, len, retlen, buf);
 }
 
@@ -369,6 +377,7 @@ static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 			     unsigned long count, loff_t to, size_t *retlen)
 {
+	*retlen = 0;
 	return mtd->writev(mtd, vecs, count, to, retlen);
 }
 
-- 
cgit v1.2.3


From d5de20a9a1c5ad68c07e017d11f6dbb5e289750c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 18:00:29 +0200
Subject: mtd: kill dev_to_mtd helper

... since it is not needed because the generic 'dev_get_drvdata()' can be
used instead.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 25 +++++++++++++------------
 include/linux/mtd/mtd.h |  5 -----
 2 files changed, 13 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 376fbfdb09aa..8bea2d0fdb20 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -107,7 +107,8 @@ static LIST_HEAD(mtd_notifiers);
  */
 static void mtd_release(struct device *dev)
 {
-	dev_t index = MTD_DEVT(dev_to_mtd(dev)->index);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
+	dev_t index = MTD_DEVT(mtd->index);
 
 	/* remove /dev/mtdXro node if needed */
 	if (index)
@@ -116,7 +117,7 @@ static void mtd_release(struct device *dev)
 
 static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	if (mtd && mtd->suspend)
 		return mtd_suspend(mtd);
@@ -126,7 +127,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 
 static int mtd_cls_resume(struct device *dev)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 	
 	if (mtd && mtd->resume)
 		mtd_resume(mtd);
@@ -136,7 +137,7 @@ static int mtd_cls_resume(struct device *dev)
 static ssize_t mtd_type_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 	char *type;
 
 	switch (mtd->type) {
@@ -172,7 +173,7 @@ static DEVICE_ATTR(type, S_IRUGO, mtd_type_show, NULL);
 static ssize_t mtd_flags_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)mtd->flags);
 
@@ -182,7 +183,7 @@ static DEVICE_ATTR(flags, S_IRUGO, mtd_flags_show, NULL);
 static ssize_t mtd_size_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%llu\n",
 		(unsigned long long)mtd->size);
@@ -193,7 +194,7 @@ static DEVICE_ATTR(size, S_IRUGO, mtd_size_show, NULL);
 static ssize_t mtd_erasesize_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->erasesize);
 
@@ -203,7 +204,7 @@ static DEVICE_ATTR(erasesize, S_IRUGO, mtd_erasesize_show, NULL);
 static ssize_t mtd_writesize_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->writesize);
 
@@ -213,7 +214,7 @@ static DEVICE_ATTR(writesize, S_IRUGO, mtd_writesize_show, NULL);
 static ssize_t mtd_subpagesize_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 	unsigned int subpagesize = mtd->writesize >> mtd->subpage_sft;
 
 	return snprintf(buf, PAGE_SIZE, "%u\n", subpagesize);
@@ -224,7 +225,7 @@ static DEVICE_ATTR(subpagesize, S_IRUGO, mtd_subpagesize_show, NULL);
 static ssize_t mtd_oobsize_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)mtd->oobsize);
 
@@ -234,7 +235,7 @@ static DEVICE_ATTR(oobsize, S_IRUGO, mtd_oobsize_show, NULL);
 static ssize_t mtd_numeraseregions_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%u\n", mtd->numeraseregions);
 
@@ -245,7 +246,7 @@ static DEVICE_ATTR(numeraseregions, S_IRUGO, mtd_numeraseregions_show,
 static ssize_t mtd_name_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct mtd_info *mtd = dev_to_mtd(dev);
+	struct mtd_info *mtd = dev_get_drvdata(dev);
 
 	return snprintf(buf, PAGE_SIZE, "%s\n", mtd->name);
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a09077aca45b..cb33cc12e18f 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -422,11 +422,6 @@ static inline int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	return mtd->block_markbad(mtd, ofs);
 }
 
-static inline struct mtd_info *dev_to_mtd(struct device *dev)
-{
-	return dev ? dev_get_drvdata(dev) : NULL;
-}
-
 static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
 {
 	if (mtd->erasesize_shift)
-- 
cgit v1.2.3


From bac972777403f810d83062dd0d0303746e466ece Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 15:46:40 +0200
Subject: mtd: remove unused default_mtd_readv prototype

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtd.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index cb33cc12e18f..671c89289fc3 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -485,9 +485,6 @@ extern int unregister_mtd_user (struct mtd_notifier *old);
 int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		       unsigned long count, loff_t to, size_t *retlen);
 
-int default_mtd_readv(struct mtd_info *mtd, struct kvec *vecs,
-		      unsigned long count, loff_t from, size_t *retlen);
-
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size);
 
 void mtd_erase_callback(struct erase_info *instr);
-- 
cgit v1.2.3


From 52b020317f65114eeba2ee2cfa70a51a286f1d8a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 15:57:25 +0200
Subject: mtd: clean-up the default_mtd_writev function

1. Teach 'mtd_write()' function to return '-EROFS' if the write method
   is undefined, and remove the corresponding check from
   'default_mtd_writev()'.
2. Do not test 'retlen' for NULL - it cannot be NULL.
3. Few minor coding stile clean-ups.
4. Add a kerneldoc comment

Additionally, minor fixes to the kerneldoc comments of the neighbor function.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 41 ++++++++++++++++++++++-------------------
 include/linux/mtd/mtd.h |  2 ++
 2 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 8bea2d0fdb20..85a3f197e7f0 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -683,10 +683,17 @@ void __put_mtd_device(struct mtd_info *mtd)
 	module_put(mtd->owner);
 }
 
-/* default_mtd_writev - default mtd writev method for MTD devices that
- *			don't implement their own
+/*
+ * default_mtd_writev - the default writev method
+ * @mtd: mtd device description object pointer
+ * @vecs: the vectors to write
+ * @count: count of vectors in @vecs
+ * @to: the MTD device offset to write to
+ * @retlen: on exit contains the count of bytes written to the MTD device.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
  */
-
 int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 		       unsigned long count, loff_t to, size_t *retlen)
 {
@@ -694,28 +701,24 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	size_t totlen = 0, thislen;
 	int ret = 0;
 
-	if(!mtd->write) {
-		ret = -EROFS;
-	} else {
-		for (i=0; i<count; i++) {
-			if (!vecs[i].iov_len)
-				continue;
-			ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen,
-					vecs[i].iov_base);
-			totlen += thislen;
-			if (ret || thislen != vecs[i].iov_len)
-				break;
-			to += vecs[i].iov_len;
-		}
+	for (i = 0; i < count; i++) {
+		if (!vecs[i].iov_len)
+			continue;
+		ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen,
+				vecs[i].iov_base);
+		totlen += thislen;
+		if (ret || thislen != vecs[i].iov_len)
+			break;
+		to += vecs[i].iov_len;
 	}
-	if (retlen)
-		*retlen = totlen;
+	*retlen = totlen;
 	return ret;
 }
 
 /**
  * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size
- * @size: A pointer to the ideal or maximum size of the allocation. Points
+ * @mtd: mtd device description object pointer
+ * @size: a pointer to the ideal or maximum size of the allocation, points
  *        to the actual allocation size on success.
  *
  * This routine attempts to allocate a contiguous kernel buffer up to
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 671c89289fc3..f0dd5a305b89 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -291,6 +291,8 @@ static inline int mtd_write(struct mtd_info *mtd, loff_t to, size_t len,
 			    size_t *retlen, const u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->write)
+		return -EROFS;
 	return mtd->write(mtd, to, len, retlen, buf);
 }
 
-- 
cgit v1.2.3


From fc002e3c320602d0e206f607aca0460540d7637a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 18:35:07 +0200
Subject: mtd: introduce mtd_has_oob helper

We are working in the direction of making sure that MTD clients to not
use 'mtd->func' pointers directly. In some places we want to know if
OOB operations are supported by an MTD device. Introduce 'mtd_has_oob()'
helper for these purposes.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/sm_ftl.c    | 4 ++--
 include/linux/mtd/mtd.h | 5 +++++
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 83b0c82e9c94..c501eec17b38 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1004,7 +1004,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			break;
 
 		case MTD_FILE_MODE_RAW:
-			if (!mtd->read_oob || !mtd->write_oob)
+			if (!mtd_has_oob(mtd))
 				return -EOPNOTSUPP;
 			mfi->mode = arg;
 
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 4ec2af7fb845..072ed5970e2f 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -645,8 +645,8 @@ int sm_get_media_info(struct sm_ftl *ftl, struct mtd_info *mtd)
 	if (!ftl->smallpagenand && mtd->oobsize < SM_OOB_SIZE)
 		return -ENODEV;
 
-	/* We use these functions for IO */
-	if (!mtd->read_oob || !mtd->write_oob)
+	/* We use OOB */
+	if (!mtd_has_oob(mtd))
 		return -ENODEV;
 
 	/* Find geometry information */
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index f0dd5a305b89..478701566ba7 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -454,6 +454,11 @@ static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd)
 	return do_div(sz, mtd->writesize);
 }
 
+static inline int mtd_has_oob(const struct mtd_info *mtd)
+{
+	return mtd->read_oob && mtd->write_oob;
+}
+
 	/* Kernel-side ioctl definitions */
 
 struct mtd_partition;
-- 
cgit v1.2.3


From 10934478e44d9a5a7b16dadd89094fb608cf101e Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 15:55:42 +0200
Subject: mtd: do use mtd->point directly

Remove direct usage of the "mtd->point" function pointer. Instead,
test the mtd_point() return code for '-EOPNOTSUPP'.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/erase.c        |  9 ++++-----
 fs/jffs2/readinode.c    | 18 ++++++++----------
 fs/jffs2/scan.c         |  2 +-
 include/linux/mtd/mtd.h |  2 ++
 4 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index c59d642cade2..a01cdad6aad1 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -336,12 +336,11 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
 	uint32_t ofs;
 	size_t retlen;
 	int ret = -EIO;
+	unsigned long *wordebuf;
 
-	if (c->mtd->point) {
-		unsigned long *wordebuf;
-
-		ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen,
-				&ebuf, NULL);
+	ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen,
+			&ebuf, NULL);
+	if (ret != -EOPNOTSUPP) {
 		if (ret) {
 			D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
 			goto do_flash_read;
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index fca2f84e1add..3093ac4fb24c 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -62,17 +62,15 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
 #ifndef __ECOS
 	/* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
 	 * adding and jffs2_flash_read_end() interface. */
-	if (c->mtd->point) {
-		err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer,
-				NULL);
-		if (!err && retlen < len) {
-			JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
-			mtd_unpoint(c->mtd, ofs, retlen);
-		} else if (err)
+	err = mtd_point(c->mtd, ofs, len, &retlen, (void **)&buffer, NULL);
+	if (!err && retlen < len) {
+		JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
+		mtd_unpoint(c->mtd, ofs, retlen);
+	} else if (err) {
+		if (err != -EOPNOTSUPP)
 			JFFS2_WARNING("MTD point failed: error code %d.\n", err);
-		else
-			pointed = 1; /* succefully pointed to device */
-	}
+	} else
+		pointed = 1; /* succefully pointed to device */
 #endif
 
 	if (!pointed) {
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 83e1665e2574..f99464833bb2 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -105,7 +105,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 			mtd_unpoint(c->mtd, 0, pointlen);
 			flashbuf = NULL;
 		}
-		if (ret)
+		if (ret && ret != -EOPNOTSUPP)
 			D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
 	}
 #endif
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 478701566ba7..b355a83e7cc2 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -259,6 +259,8 @@ static inline int mtd_point(struct mtd_info *mtd, loff_t from, size_t len,
 			    size_t *retlen, void **virt, resource_size_t *phys)
 {
 	*retlen = 0;
+	if (!mtd->point)
+		return -EOPNOTSUPP;
 	return mtd->point(mtd, from, len, retlen, virt, phys);
 }
 
-- 
cgit v1.2.3


From cd621274b0ec747db8dedbf857624c067f481976 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 14:31:57 +0200
Subject: mtd: do not use mtd->get_unmapped_area directly

Remove direct usage of mtd->get_unmapped_area. Instead, just call
'mtd_get_unmapped_area()' which will return -EOPNOTSUPP if the function
is not implemented and test for this error code.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 26 +++++++++++---------------
 drivers/mtd/mtdconcat.c |  6 +-----
 include/linux/mtd/mtd.h |  2 ++
 3 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index c501eec17b38..55f0961103a7 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1124,25 +1124,21 @@ static unsigned long mtdchar_get_unmapped_area(struct file *file,
 {
 	struct mtd_file_info *mfi = file->private_data;
 	struct mtd_info *mtd = mfi->mtd;
+	unsigned long offset;
+	int ret;
 
-	if (mtd->get_unmapped_area) {
-		unsigned long offset;
-
-		if (addr != 0)
-			return (unsigned long) -EINVAL;
-
-		if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT))
-			return (unsigned long) -EINVAL;
+	if (addr != 0)
+		return (unsigned long) -EINVAL;
 
-		offset = pgoff << PAGE_SHIFT;
-		if (offset > mtd->size - len)
-			return (unsigned long) -EINVAL;
+	if (len > mtd->size || pgoff >= (mtd->size >> PAGE_SHIFT))
+		return (unsigned long) -EINVAL;
 
-		return mtd_get_unmapped_area(mtd, len, offset, flags);
-	}
+	offset = pgoff << PAGE_SHIFT;
+	if (offset > mtd->size - len)
+		return (unsigned long) -EINVAL;
 
-	/* can't map directly */
-	return (unsigned long) -ENOSYS;
+	ret = mtd_get_unmapped_area(mtd, len, offset, flags);
+	return ret == -EOPNOTSUPP ? -ENOSYS : ret;
 }
 #endif
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index f694b51e7856..9119f76f87ff 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -726,11 +726,7 @@ static unsigned long concat_get_unmapped_area(struct mtd_info *mtd,
 		if (offset + len > subdev->size)
 			return (unsigned long) -EINVAL;
 
-		if (subdev->get_unmapped_area)
-			return mtd_get_unmapped_area(subdev, len, offset,
-						     flags);
-
-		break;
+		return mtd_get_unmapped_area(subdev, len, offset, flags);
 	}
 
 	return (unsigned long) -ENOSYS;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index b355a83e7cc2..2c2a92247e5a 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -280,6 +280,8 @@ static inline unsigned long mtd_get_unmapped_area(struct mtd_info *mtd,
 						  unsigned long offset,
 						  unsigned long flags)
 {
+	if (!mtd->get_unmapped_area)
+		return -EOPNOTSUPP;
 	return mtd->get_unmapped_area(mtd, len, offset, flags);
 }
 
-- 
cgit v1.2.3


From 016c1291ce70a22f15f666441a4fd2f0b450375b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 17:27:18 +0200
Subject: mtd: mtdoops: do not use mtd->panic_write directly

Instead of checking if 'mtd->panic_write' is defined, call 'mtd_panic_write()'
and check the error code - '-EOPNOTSUPP' will be returned if the function is
not defined.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdoops.c   | 17 ++++++++---------
 include/linux/mtd/mtd.h |  2 ++
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 69532a34e563..c8540b8a7fc6 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -221,10 +221,14 @@ static void mtdoops_write(struct mtdoops_context *cxt, int panic)
 	hdr[0] = cxt->nextcount;
 	hdr[1] = MTDOOPS_KERNMSG_MAGIC;
 
-	if (panic)
+	if (panic) {
 		ret = mtd_panic_write(mtd, cxt->nextpage * record_size,
 				      record_size, &retlen, cxt->oops_buf);
-	else
+		if (ret == -EOPNOTSUPP) {
+			printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n");
+			return;
+		}
+	} else
 		ret = mtd_write(mtd, cxt->nextpage * record_size,
 				record_size, &retlen, cxt->oops_buf);
 
@@ -330,13 +334,8 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
 	memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
 
 	/* Panics must be written immediately */
-	if (reason != KMSG_DUMP_OOPS) {
-		if (!cxt->mtd->panic_write)
-			printk(KERN_ERR "mtdoops: Cannot write from panic without panic_write\n");
-		else
-			mtdoops_write(cxt, 1);
-		return;
-	}
+	if (reason != KMSG_DUMP_OOPS)
+		mtdoops_write(cxt, 1);
 
 	/* For other cases, schedule work to write it "nicely" */
 	schedule_work(&cxt->work_write);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 2c2a92247e5a..b72964049cdc 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -311,6 +311,8 @@ static inline int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 				  size_t *retlen, const u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->panic_write)
+		return -EOPNOTSUPP;
 	return mtd->panic_write(mtd, to, len, retlen, buf);
 }
 
-- 
cgit v1.2.3


From dac2639f9833e858139d7e07f6ee45fb2191a9f2 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 17:50:34 +0200
Subject: mtd: do not use mtd->read_oob directly

Instead of checking whether 'mtd->read_oob' is defined, just call
'mtd_read_oob()' and handle the '-EOPNOTSUPP' error which will be returned
if the function is undefined.

Additionally, make 'mtd_write_oob()' return '-EOPNOTSUPP' if the function
is undefined.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 9 ++-------
 include/linux/mtd/mtd.h | 4 ++++
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 55f0961103a7..287ff0d35848 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -452,13 +452,8 @@ static int mtdchar_readoob(struct file *file, struct mtd_info *mtd,
 	if (length > 4096)
 		return -EINVAL;
 
-	if (!mtd->read_oob)
-		ret = -EOPNOTSUPP;
-	else
-		ret = access_ok(VERIFY_WRITE, ptr,
-				length) ? 0 : -EFAULT;
-	if (ret)
-		return ret;
+	if (!access_ok(VERIFY_WRITE, ptr, length))
+		return -EFAULT;
 
 	ops.ooblen = length;
 	ops.ooboffs = start & (mtd->writesize - 1);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index b72964049cdc..721a63ffeb96 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -320,6 +320,8 @@ static inline int mtd_read_oob(struct mtd_info *mtd, loff_t from,
 			       struct mtd_oob_ops *ops)
 {
 	ops->retlen = ops->oobretlen = 0;
+	if (!mtd->read_oob)
+		return -EOPNOTSUPP;
 	return mtd->read_oob(mtd, from, ops);
 }
 
@@ -327,6 +329,8 @@ static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
 				struct mtd_oob_ops *ops)
 {
 	ops->retlen = ops->oobretlen = 0;
+	if (!mtd->write_oob)
+		return -EOPNOTSUPP;
 	return mtd->write_oob(mtd, to, ops);
 }
 
-- 
cgit v1.2.3


From 87e858a97e8a7010aedc01db7cd31cc7c02b0b6a Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 28 Dec 2011 18:47:46 +0200
Subject: mtd: do not use mtd->get_*_prot_info directly

Instead, call 'mtd_get_*_prot_info()' and check for '-EOPNOTSUPP'. While
on it, fix the return code from '-EOPNOTSUPP' to '-EINVAL' for the case
when the mode parameter is invalid.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 8 +++-----
 include/linux/mtd/mtd.h | 4 ++++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 287ff0d35848..49340dc1b107 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -919,17 +919,15 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		struct otp_info *buf = kmalloc(4096, GFP_KERNEL);
 		if (!buf)
 			return -ENOMEM;
-		ret = -EOPNOTSUPP;
 		switch (mfi->mode) {
 		case MTD_FILE_MODE_OTP_FACTORY:
-			if (mtd->get_fact_prot_info)
-				ret = mtd_get_fact_prot_info(mtd, buf, 4096);
+			ret = mtd_get_fact_prot_info(mtd, buf, 4096);
 			break;
 		case MTD_FILE_MODE_OTP_USER:
-			if (mtd->get_user_prot_info)
-				ret = mtd_get_user_prot_info(mtd, buf, 4096);
+			ret = mtd_get_user_prot_info(mtd, buf, 4096);
 			break;
 		default:
+			ret = -EINVAL;
 			break;
 		}
 		if (ret >= 0) {
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 721a63ffeb96..7122efdc6d99 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -342,6 +342,8 @@ static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
 static inline int mtd_get_fact_prot_info(struct mtd_info *mtd,
 					 struct otp_info *buf, size_t len)
 {
+	if (!mtd->get_fact_prot_info)
+		return -EOPNOTSUPP;
 	return mtd->get_fact_prot_info(mtd, buf, len);
 }
 
@@ -357,6 +359,8 @@ static inline int mtd_get_user_prot_info(struct mtd_info *mtd,
 					 struct otp_info *buf,
 					 size_t len)
 {
+	if (!mtd->get_user_prot_info)
+		return -EOPNOTSUPP;
 	return mtd->get_user_prot_info(mtd, buf, len);
 }
 
-- 
cgit v1.2.3


From b6de3d6cb63427178c4f1df88b81d1ceee637e6f Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 10:06:32 +0200
Subject: mtd: do not use mtd->read_*_prot_reg directly

Instead, call 'mtd_read_*_prot_info()' and check for -EOPNOTSUPP.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 18 ++++++++++--------
 include/linux/mtd/mtd.h |  4 ++++
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 49340dc1b107..4e8e5fbc1e13 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -366,20 +366,22 @@ static void mtdchar_erase_callback (struct erase_info *instr)
 static int otp_select_filemode(struct mtd_file_info *mfi, int mode)
 {
 	struct mtd_info *mtd = mfi->mtd;
+	size_t retlen;
 	int ret = 0;
 
+	/*
+	 * Make a fake call to mtd_read_fact_prot_reg() to check if OTP
+	 * operations are supported.
+	 */
+	if (mtd_read_fact_prot_reg(mtd, -1, -1, &retlen, NULL) == -EOPNOTSUPP)
+		return -EOPNOTSUPP;
+
 	switch (mode) {
 	case MTD_OTP_FACTORY:
-		if (!mtd->read_fact_prot_reg)
-			ret = -EOPNOTSUPP;
-		else
-			mfi->mode = MTD_FILE_MODE_OTP_FACTORY;
+		mfi->mode = MTD_FILE_MODE_OTP_FACTORY;
 		break;
 	case MTD_OTP_USER:
-		if (!mtd->read_fact_prot_reg)
-			ret = -EOPNOTSUPP;
-		else
-			mfi->mode = MTD_FILE_MODE_OTP_USER;
+		mfi->mode = MTD_FILE_MODE_OTP_USER;
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7122efdc6d99..e488cf910914 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -352,6 +352,8 @@ static inline int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from,
 					 u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->read_fact_prot_reg)
+		return -EOPNOTSUPP;
 	return mtd->read_fact_prot_reg(mtd, from, len, retlen, buf);
 }
 
@@ -369,6 +371,8 @@ static inline int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from,
 					 u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->read_user_prot_reg)
+		return -EOPNOTSUPP;
 	return mtd->read_user_prot_reg(mtd, from, len, retlen, buf);
 }
 
-- 
cgit v1.2.3


From 27c151a5e52efaa46d0938984f2ef591bdcb6d5b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 10:39:20 +0200
Subject: mtd: mtd->write_user_prot_reg directly

Instead, just call 'mtd_write_user_prot_reg()' and check the '-EOPNOTSUPP' return
code.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 4 ----
 include/linux/mtd/mtd.h | 2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 4e8e5fbc1e13..25bbbc3aa665 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -310,10 +310,6 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
 			ret = -EROFS;
 			break;
 		case MTD_FILE_MODE_OTP_USER:
-			if (!mtd->write_user_prot_reg) {
-				ret = -EOPNOTSUPP;
-				break;
-			}
 			ret = mtd_write_user_prot_reg(mtd, *ppos, len,
 						      &retlen, kbuf);
 			break;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index e488cf910914..7cd56d2b9419 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -381,6 +381,8 @@ static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
 					  u_char *buf)
 {
 	*retlen = 0;
+	if (!mtd->write_user_prot_reg)
+		return -EOPNOTSUPP;
 	return mtd->write_user_prot_reg(mtd, to, len, retlen, buf);
 }
 
-- 
cgit v1.2.3


From e2936b2af5562c8c66060e2bc2ae2e209d0acd3d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 29 Dec 2011 10:45:04 +0200
Subject: mtd: do not use mtd->lock_user_prot_reg directly

Instead, check the -EOPNOTSUPP return code of 'mtd_lock_user_prot_reg()'.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 2 --
 include/linux/mtd/mtd.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 25bbbc3aa665..2020a169ed9c 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -949,8 +949,6 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 			return -EINVAL;
 		if (copy_from_user(&oinfo, argp, sizeof(oinfo)))
 			return -EFAULT;
-		if (!mtd->lock_user_prot_reg)
-			return -EOPNOTSUPP;
 		ret = mtd_lock_user_prot_reg(mtd, oinfo.start, oinfo.length);
 		break;
 	}
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7cd56d2b9419..a994129ede55 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -389,6 +389,8 @@ static inline int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to,
 static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 					 size_t len)
 {
+	if (!mtd->lock_user_prot_reg)
+		return -EOPNOTSUPP;
 	return mtd->lock_user_prot_reg(mtd, from, len);
 }
 
-- 
cgit v1.2.3


From 1dbebd32562b3c2caeca35960e5cb00bfcc12900 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 16:23:41 +0200
Subject: mtd: harmonize mtd_writev usage

This patch makes the 'mtd_writev()' function more usable and logical. We first
teach it to fall-back to the 'default_mtd_writev()' function if the MTD driver
does not define its own '->writev()' method. Then we make block2mtd and JFFS2
just 'mtd_writev()' instead of 'default_mtd_writev()' function. This means we
can now stop exporting 'default_mtd_writev()' and instead, export
'mtd_writev()'. This is much cleaner and more logical, as well as allows us to
get read of another direct 'mtd->writev' access in JFFS2.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/devices/block2mtd.c |  2 +-
 drivers/mtd/mtdcore.c           | 26 +++++++++++++++++++++++---
 fs/jffs2/writev.c               |  6 +-----
 include/linux/mtd/mtd.h         | 16 ++--------------
 4 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index b78f23169d4e..c16f6b4e8938 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -288,7 +288,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
 	dev->mtd.flags = MTD_CAP_RAM;
 	dev->mtd.erase = block2mtd_erase;
 	dev->mtd.write = block2mtd_write;
-	dev->mtd.writev = default_mtd_writev;
+	dev->mtd.writev = mtd_writev;
 	dev->mtd.sync = block2mtd_sync;
 	dev->mtd.read = block2mtd_read;
 	dev->mtd.priv = dev;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 53a200f722b6..4d0f3e557bd1 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -696,8 +696,8 @@ EXPORT_SYMBOL_GPL(__put_mtd_device);
  * This function returns zero in case of success and a negative error code in
  * case of failure.
  */
-int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
-		       unsigned long count, loff_t to, size_t *retlen)
+static int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
+			      unsigned long count, loff_t to, size_t *retlen)
 {
 	unsigned long i;
 	size_t totlen = 0, thislen;
@@ -716,7 +716,27 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 	*retlen = totlen;
 	return ret;
 }
-EXPORT_SYMBOL_GPL(default_mtd_writev);
+
+/*
+ * mtd_writev - the vector-based MTD write method
+ * @mtd: mtd device description object pointer
+ * @vecs: the vectors to write
+ * @count: count of vectors in @vecs
+ * @to: the MTD device offset to write to
+ * @retlen: on exit contains the count of bytes written to the MTD device.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
+	       unsigned long count, loff_t to, size_t *retlen)
+{
+	*retlen = 0;
+	if (!mtd->writev)
+		return default_mtd_writev(mtd, vecs, count, to, retlen);
+	return mtd->writev(mtd, vecs, count, to, retlen);
+}
+EXPORT_SYMBOL_GPL(mtd_writev);
 
 /**
  * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size
diff --git a/fs/jffs2/writev.c b/fs/jffs2/writev.c
index 8d704073f8b0..a1bda9dab3f8 100644
--- a/fs/jffs2/writev.c
+++ b/fs/jffs2/writev.c
@@ -26,11 +26,7 @@ int jffs2_flash_direct_writev(struct jffs2_sb_info *c, const struct kvec *vecs,
 		}
 	}
 
-	if (c->mtd->writev)
-		return mtd_writev(c->mtd, vecs, count, to, retlen);
-	else {
-		return default_mtd_writev(c->mtd, vecs, count, to, retlen);
-	}
+	return mtd_writev(c->mtd, vecs, count, to, retlen);
 }
 
 int jffs2_flash_direct_write(struct jffs2_sb_info *c, loff_t ofs, size_t len,
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a994129ede55..a58ecf4d1f80 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -394,16 +394,8 @@ static inline int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from,
 	return mtd->lock_user_prot_reg(mtd, from, len);
 }
 
-/*
- * kvec-based read/write method. NB: The 'count' parameter is the number of
- * _vectors_, each of which contains an (ofs, len) tuple.
- */
-static inline int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
-			     unsigned long count, loff_t to, size_t *retlen)
-{
-	*retlen = 0;
-	return mtd->writev(mtd, vecs, count, to, retlen);
-}
+int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
+	       unsigned long count, loff_t to, size_t *retlen);
 
 static inline void mtd_sync(struct mtd_info *mtd)
 {
@@ -510,10 +502,6 @@ struct mtd_notifier {
 
 extern void register_mtd_user (struct mtd_notifier *new);
 extern int unregister_mtd_user (struct mtd_notifier *old);
-
-int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
-		       unsigned long count, loff_t to, size_t *retlen);
-
 void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size);
 
 void mtd_erase_callback(struct erase_info *instr);
-- 
cgit v1.2.3


From 327cf2922b4edf0439b219469722d2a502e37349 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 16:35:35 +0200
Subject: mtd: do not use mtd->sync directly

This patch teaches 'mtd_sync()' to do nothing when the MTD driver does
not have the '->sync()' method, which allows us to remove all direct
'mtd->sync' accesses.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/ftl.c       | 3 +--
 drivers/mtd/mtdblock.c  | 7 ++-----
 drivers/mtd/mtdchar.c   | 2 +-
 drivers/mtd/mtdswap.c   | 3 +--
 drivers/mtd/rfd_ftl.c   | 3 +--
 drivers/mtd/ubi/kapi.c  | 4 +---
 fs/jffs2/super.c        | 4 +---
 fs/logfs/dev_mtd.c      | 3 +--
 include/linux/mtd/mtd.h | 3 ++-
 9 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index c9c90299c9e2..19d637266fcd 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -650,8 +650,7 @@ static int reclaim_block(partition_t *part)
 	    if (queued) {
 		pr_debug("ftl_cs: waiting for transfer "
 		      "unit to be prepared...\n");
-		if (part->mbd.mtd->sync)
-			mtd_sync(part->mbd.mtd);
+		mtd_sync(part->mbd.mtd);
 	    } else {
 		static int ne = 0;
 		if (++ne < 5)
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 496e1a6e8029..af6591237b9b 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -322,8 +322,7 @@ static int mtdblock_release(struct mtd_blktrans_dev *mbd)
 
 	if (!--mtdblk->count) {
 		/* It was the last usage. Free the cache */
-		if (mbd->mtd->sync)
-			mtd_sync(mbd->mtd);
+		mtd_sync(mbd->mtd);
 		vfree(mtdblk->cache_data);
 	}
 
@@ -341,9 +340,7 @@ static int mtdblock_flush(struct mtd_blktrans_dev *dev)
 	mutex_lock(&mtdblk->cache_mutex);
 	write_cached_data(mtdblk);
 	mutex_unlock(&mtdblk->cache_mutex);
-
-	if (dev->mtd->sync)
-		mtd_sync(dev->mtd);
+	mtd_sync(dev->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 2020a169ed9c..23a51104aeb5 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -154,7 +154,7 @@ static int mtdchar_close(struct inode *inode, struct file *file)
 	pr_debug("MTD_close\n");
 
 	/* Only sync if opened RW */
-	if ((file->f_mode & FMODE_WRITE) && mtd->sync)
+	if ((file->f_mode & FMODE_WRITE))
 		mtd_sync(mtd);
 
 	iput(mfi->ino);
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 4441c08b082d..fe4426c1c736 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -1047,8 +1047,7 @@ static int mtdswap_flush(struct mtd_blktrans_dev *dev)
 {
 	struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev);
 
-	if (d->mtd->sync)
-		mtd_sync(d->mtd);
+	mtd_sync(d->mtd);
 	return 0;
 }
 
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index 5426d42cdea7..233b946e5d66 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -448,8 +448,7 @@ static int reclaim_block(struct partition *part, u_long *old_sector)
 	int rc;
 
 	/* we have a race if sync doesn't exist */
-	if (part->mbd.mtd->sync)
-		mtd_sync(part->mbd.mtd);
+	mtd_sync(part->mbd.mtd);
 
 	score = 0x7fffffff; /* MAX_INT */
 	best_block = -1;
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 9f265cc1a0d3..9fdb35367fe0 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -714,9 +714,7 @@ int ubi_sync(int ubi_num)
 	if (!ubi)
 		return -ENODEV;
 
-	if (ubi->mtd->sync)
-		mtd_sync(ubi->mtd);
-
+	mtd_sync(ubi->mtd);
 	ubi_put_device(ubi);
 	return 0;
 }
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index e78bf3cd1b73..5863a369d929 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -336,9 +336,7 @@ static void jffs2_put_super (struct super_block *sb)
 	jffs2_flash_cleanup(c);
 	kfree(c->inocache_list);
 	jffs2_clear_xattr_subsystem(c);
-	if (c->mtd->sync)
-		mtd_sync(c->mtd);
-
+	mtd_sync(c->mtd);
 	D1(printk(KERN_DEBUG "jffs2_put_super returning\n"));
 }
 
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 136c7360a9b6..3f465882ee70 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -119,8 +119,7 @@ static void logfs_mtd_sync(struct super_block *sb)
 {
 	struct mtd_info *mtd = logfs_super(sb)->s_mtd;
 
-	if (mtd->sync)
-		mtd_sync(mtd);
+	mtd_sync(mtd);
 }
 
 static int logfs_mtd_readpage(void *_sb, struct page *page)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a58ecf4d1f80..305f12b940f4 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -399,7 +399,8 @@ int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
 
 static inline void mtd_sync(struct mtd_info *mtd)
 {
-	mtd->sync(mtd);
+	if (mtd->sync)
+		mtd->sync(mtd);
 }
 
 /* Chip-supported device locking */
-- 
cgit v1.2.3


From 381345652fca688aeaa967c231e5075cf68d05b6 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 17:00:35 +0200
Subject: mtd: do not use mtd->lock, unlock and is_locked directly

Instead, call the corresponding MTD API function which will return
'-EOPNOTSUPP' if the operation is not supported.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/scb2_flash.c |  3 +--
 drivers/mtd/mtdchar.c         | 15 +++------------
 drivers/mtd/mtdconcat.c       | 18 ++++++------------
 drivers/mtd/mtdcore.c         |  6 +++---
 include/linux/mtd/mtd.h       |  6 ++++++
 5 files changed, 19 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/scb2_flash.c b/drivers/mtd/maps/scb2_flash.c
index 01af34778de3..934a72c80078 100644
--- a/drivers/mtd/maps/scb2_flash.c
+++ b/drivers/mtd/maps/scb2_flash.c
@@ -204,8 +204,7 @@ scb2_flash_remove(struct pci_dev *dev)
 		return;
 
 	/* disable flash writes */
-	if (scb2_mtd->lock)
-		mtd_lock(scb2_mtd, 0, scb2_mtd->size);
+	mtd_lock(scb2_mtd, 0, scb2_mtd->size);
 
 	mtd_device_unregister(scb2_mtd);
 	map_destroy(scb2_mtd);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 23a51104aeb5..92da621b1425 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -814,10 +814,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (copy_from_user(&einfo, argp, sizeof(einfo)))
 			return -EFAULT;
 
-		if (!mtd->lock)
-			ret = -EOPNOTSUPP;
-		else
-			ret = mtd_lock(mtd, einfo.start, einfo.length);
+		ret = mtd_lock(mtd, einfo.start, einfo.length);
 		break;
 	}
 
@@ -828,10 +825,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (copy_from_user(&einfo, argp, sizeof(einfo)))
 			return -EFAULT;
 
-		if (!mtd->unlock)
-			ret = -EOPNOTSUPP;
-		else
-			ret = mtd_unlock(mtd, einfo.start, einfo.length);
+		ret = mtd_unlock(mtd, einfo.start, einfo.length);
 		break;
 	}
 
@@ -842,10 +836,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 		if (copy_from_user(&einfo, argp, sizeof(einfo)))
 			return -EFAULT;
 
-		if (!mtd->is_locked)
-			ret = -EOPNOTSUPP;
-		else
-			ret = mtd_is_locked(mtd, einfo.start, einfo.length);
+		ret = mtd_is_locked(mtd, einfo.start, einfo.length);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index 9119f76f87ff..aaafb5e18765 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -555,12 +555,9 @@ static int concat_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 		else
 			size = len;
 
-		if (subdev->lock) {
-			err = mtd_lock(subdev, ofs, size);
-			if (err)
-				break;
-		} else
-			err = -EOPNOTSUPP;
+		err = mtd_lock(subdev, ofs, size);
+		if (err)
+			break;
 
 		len -= size;
 		if (len == 0)
@@ -595,12 +592,9 @@ static int concat_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 		else
 			size = len;
 
-		if (subdev->unlock) {
-			err = mtd_unlock(subdev, ofs, size);
-			if (err)
-				break;
-		} else
-			err = -EOPNOTSUPP;
+		err = mtd_unlock(subdev, ofs, size);
+		if (err)
+			break;
 
 		len -= size;
 		if (len == 0)
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 4d0f3e557bd1..66494ee5355a 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -339,9 +339,9 @@ int add_mtd_device(struct mtd_info *mtd)
 	mtd->writesize_mask = (1 << mtd->writesize_shift) - 1;
 
 	/* Some chips always power up locked. Unlock them now */
-	if ((mtd->flags & MTD_WRITEABLE)
-	    && (mtd->flags & MTD_POWERUP_LOCK) && mtd->unlock) {
-		if (mtd_unlock(mtd, 0, mtd->size))
+	if ((mtd->flags & MTD_WRITEABLE) && (mtd->flags & MTD_POWERUP_LOCK)) {
+		error = mtd_unlock(mtd, 0, mtd->size);
+		if (error && error != -EOPNOTSUPP)
 			printk(KERN_WARNING
 			       "%s: unlock failed, writes may not work\n",
 			       mtd->name);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 305f12b940f4..6c91ba59c229 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -406,16 +406,22 @@ static inline void mtd_sync(struct mtd_info *mtd)
 /* Chip-supported device locking */
 static inline int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
+	if (!mtd->lock)
+		return -EOPNOTSUPP;
 	return mtd->lock(mtd, ofs, len);
 }
 
 static inline int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
+	if (!mtd->unlock)
+		return -EOPNOTSUPP;
 	return mtd->unlock(mtd, ofs, len);
 }
 
 static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
+	if (!mtd->is_locked)
+		return -EOPNOTSUPP;
 	return mtd->is_locked(mtd, ofs, len);
 }
 
-- 
cgit v1.2.3


From 079c985e7a6f4ce60f931cebfdd5ee3c38347e31 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Fri, 30 Dec 2011 17:15:59 +0200
Subject: mtd: do not use mtd->suspend and mtd->resume directly

Just call the 'mtd_suspend()' and 'mtd_resume()' - they will do nothing
if the operation is not defined.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/maps/physmap.c        | 5 ++---
 drivers/mtd/maps/rbtx4939-flash.c | 5 ++---
 drivers/mtd/mtdcore.c             | 5 +----
 include/linux/mtd/mtd.h           | 5 ++++-
 4 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index d94cc62186c1..abc562653b31 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -190,9 +190,8 @@ static void physmap_flash_shutdown(struct platform_device *dev)
 	int i;
 
 	for (i = 0; i < MAX_RESOURCES && info->mtd[i]; i++)
-		if (info->mtd[i]->suspend && info->mtd[i]->resume)
-			if (mtd_suspend(info->mtd[i]) == 0)
-				mtd_resume(info->mtd[i]);
+		if (mtd_suspend(info->mtd[i]) == 0)
+			mtd_resume(info->mtd[i]);
 }
 #else
 #define physmap_flash_shutdown NULL
diff --git a/drivers/mtd/maps/rbtx4939-flash.c b/drivers/mtd/maps/rbtx4939-flash.c
index 717628312040..3da63fc6f16e 100644
--- a/drivers/mtd/maps/rbtx4939-flash.c
+++ b/drivers/mtd/maps/rbtx4939-flash.c
@@ -119,9 +119,8 @@ static void rbtx4939_flash_shutdown(struct platform_device *dev)
 {
 	struct rbtx4939_flash_info *info = platform_get_drvdata(dev);
 
-	if (info->mtd->suspend && info->mtd->resume)
-		if (mtd_suspend(info->mtd) == 0)
-			mtd_resume(info->mtd);
+	if (mtd_suspend(info->mtd) == 0)
+		mtd_resume(info->mtd);
 }
 #else
 #define rbtx4939_flash_shutdown NULL
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 66494ee5355a..6ae9ca01388b 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -119,10 +119,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 {
 	struct mtd_info *mtd = dev_get_drvdata(dev);
 
-	if (mtd && mtd->suspend)
-		return mtd_suspend(mtd);
-	else
-		return 0;
+	return mtd_suspend(mtd);
 }
 
 static int mtd_cls_resume(struct device *dev)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 6c91ba59c229..089370758fc9 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -427,12 +427,15 @@ static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 
 static inline int mtd_suspend(struct mtd_info *mtd)
 {
+	if (!mtd->suspend)
+		return -EOPNOTSUPP;
 	return mtd->suspend(mtd);
 }
 
 static inline void mtd_resume(struct mtd_info *mtd)
 {
-	mtd->resume(mtd);
+	if (mtd->resume)
+		mtd->resume(mtd);
 }
 
 static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
-- 
cgit v1.2.3


From 8f461a730242c528ca221948edceca49266a3ffb Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 2 Jan 2012 13:48:54 +0200
Subject: mtd: introduce mtd_can_have_bb helper

This patch introduces new 'mtd_can_have_bb()' helper function which checks
whether the flash can have bad eraseblocks. Then it changes all the
direct 'mtd->block_isbad' use cases with 'mtd_can_have_bb()'.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c               | 5 +----
 drivers/mtd/mtdconcat.c             | 2 +-
 drivers/mtd/mtdoops.c               | 4 ++--
 drivers/mtd/mtdswap.c               | 4 ++--
 drivers/mtd/nftlcore.c              | 2 +-
 drivers/mtd/redboot.c               | 4 ++--
 drivers/mtd/tests/mtd_readtest.c    | 3 +--
 drivers/mtd/tests/mtd_speedtest.c   | 3 +--
 drivers/mtd/tests/mtd_stresstest.c  | 3 +--
 drivers/mtd/tests/mtd_torturetest.c | 2 +-
 drivers/mtd/ubi/build.c             | 2 +-
 include/linux/mtd/mtd.h             | 7 +++++++
 12 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 92da621b1425..64efcbf087e9 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -867,10 +867,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 
 		if (copy_from_user(&offs, argp, sizeof(loff_t)))
 			return -EFAULT;
-		if (!mtd->block_isbad)
-			ret = -EOPNOTSUPP;
-		else
-			return mtd_block_isbad(mtd, offs);
+		return mtd_block_isbad(mtd, offs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index aaafb5e18765..fbf3cb124a93 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -647,7 +647,7 @@ static int concat_block_isbad(struct mtd_info *mtd, loff_t ofs)
 	struct mtd_concat *concat = CONCAT(mtd);
 	int i, res = 0;
 
-	if (!concat->subdev[0]->block_isbad)
+	if (!mtd_can_have_bb(concat->subdev[0]))
 		return res;
 
 	if (ofs > mtd->size)
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index c8540b8a7fc6..a4c8f67560e0 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -169,7 +169,7 @@ static void mtdoops_workfunc_erase(struct work_struct *work)
 			cxt->nextpage = 0;
 	}
 
-	while (mtd->block_isbad) {
+	while (mtd_can_have_bb(mtd)) {
 		ret = mtd_block_isbad(mtd, cxt->nextpage * record_size);
 		if (!ret)
 			break;
@@ -257,7 +257,7 @@ static void find_next_position(struct mtdoops_context *cxt)
 	size_t retlen;
 
 	for (page = 0; page < cxt->oops_pages; page++) {
-		if (mtd->block_isbad &&
+		if (mtd_can_have_bb(mtd) &&
 		    mtd_block_isbad(mtd, page * record_size))
 			continue;
 		/* Assume the page is used */
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index fe4426c1c736..3fc8cb2756c0 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -343,7 +343,7 @@ static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 	offset = mtdswap_eb_offset(d, eb);
 
 	/* Check first if the block is bad. */
-	if (d->mtd->block_isbad && mtd_block_isbad(d->mtd, offset))
+	if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
 		return MTDSWAP_SCANNED_BAD;
 
 	ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
@@ -1058,7 +1058,7 @@ static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size)
 
 	badcnt = 0;
 
-	if (mtd->block_isbad)
+	if (mtd_can_have_bb(mtd))
 		for (offset = 0; offset < size; offset += mtd->erasesize)
 			if (mtd_block_isbad(mtd, offset))
 				badcnt++;
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 8847e60ad167..a75382aff5f6 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -56,7 +56,7 @@ static void nftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	if (memcmp(mtd->name, "DiskOnChip", 10))
 		return;
 
-	if (!mtd->block_isbad) {
+	if (!mtd_can_have_bb(mtd)) {
 		printk(KERN_ERR
 "NFTL no longer supports the old DiskOnChip drivers loaded via docprobe.\n"
 "Please use the new diskonchip driver under the NAND subsystem.\n");
diff --git a/drivers/mtd/redboot.c b/drivers/mtd/redboot.c
index 09bb81ea3a7e..48970c14beff 100644
--- a/drivers/mtd/redboot.c
+++ b/drivers/mtd/redboot.c
@@ -78,7 +78,7 @@ static int parse_redboot_partitions(struct mtd_info *master,
 
 	if ( directory < 0 ) {
 		offset = master->size + directory * master->erasesize;
-		while (master->block_isbad && 
+		while (mtd_can_have_bb(master) &&
 		       mtd_block_isbad(master, offset)) {
 			if (!offset) {
 			nogood:
@@ -89,7 +89,7 @@ static int parse_redboot_partitions(struct mtd_info *master,
 		}
 	} else {
 		offset = directory * master->erasesize;
-		while (master->block_isbad && 
+		while (mtd_can_have_bb(master) &&
 		       mtd_block_isbad(master, offset)) {
 			offset += master->erasesize;
 			if (offset == master->size)
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/mtd_readtest.c
index 4228eb4e54c7..121aba189cec 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/mtd_readtest.c
@@ -148,8 +148,7 @@ static int scan_for_bad_eraseblocks(void)
 		return -ENOMEM;
 	}
 
-	/* NOR flash does not implement block_isbad */
-	if (mtd->block_isbad == NULL)
+	if (!mtd_can_have_bb(mtd))
 		return 0;
 
 	printk(PRINT_PREF "scanning for bad eraseblocks\n");
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/mtd_speedtest.c
index 4d2ed5c0807d..2aec4f3b72be 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/mtd_speedtest.c
@@ -336,8 +336,7 @@ static int scan_for_bad_eraseblocks(void)
 		return -ENOMEM;
 	}
 
-	/* NOR flash does not implement block_isbad */
-	if (mtd->block_isbad == NULL)
+	if (!mtd_can_have_bb(mtd))
 		goto out;
 
 	printk(PRINT_PREF "scanning for bad eraseblocks\n");
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c
index 399aa2bf220d..7b33f22d0b58 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/mtd_stresstest.c
@@ -227,8 +227,7 @@ static int scan_for_bad_eraseblocks(void)
 		return -ENOMEM;
 	}
 
-	/* NOR flash does not implement block_isbad */
-	if (mtd->block_isbad == NULL)
+	if (!mtd_can_have_bb(mtd))
 		return 0;
 
 	printk(PRINT_PREF "scanning for bad eraseblocks\n");
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c
index 557105f2ead3..b65861bc7b8e 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/mtd_torturetest.c
@@ -290,7 +290,7 @@ static int __init tort_init(void)
 	 * Check if there is a bad eraseblock among those we are going to test.
 	 */
 	memset(&bad_ebs[0], 0, sizeof(int) * ebcnt);
-	if (mtd->block_isbad) {
+	if (mtd_can_have_bb(mtd)) {
 		for (i = eb; i < eb + ebcnt; i++) {
 			err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize);
 
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 6c3fb5ab20f5..115749f20f9e 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -664,7 +664,7 @@ static int io_init(struct ubi_device *ubi)
 	ubi->peb_count  = mtd_div_by_eb(ubi->mtd->size, ubi->mtd);
 	ubi->flash_size = ubi->mtd->size;
 
-	if (ubi->mtd->block_isbad && ubi->mtd->block_markbad)
+	if (mtd_can_have_bb(ubi->mtd))
 		ubi->bad_allowed = 1;
 
 	if (ubi->mtd->type == MTD_NORFLASH) {
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 089370758fc9..7e35755f6931 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -440,6 +440,8 @@ static inline void mtd_resume(struct mtd_info *mtd)
 
 static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
 {
+	if (!mtd->block_isbad)
+		return -EOPNOTSUPP;
 	return mtd->block_isbad(mtd, ofs);
 }
 
@@ -483,6 +485,11 @@ static inline int mtd_has_oob(const struct mtd_info *mtd)
 	return mtd->read_oob && mtd->write_oob;
 }
 
+static inline int mtd_can_have_bb(const struct mtd_info *mtd)
+{
+	return !!mtd->block_isbad;
+}
+
 	/* Kernel-side ioctl definitions */
 
 struct mtd_partition;
-- 
cgit v1.2.3


From 800ffd3496987e91f599a135060ef49731e045ac Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 2 Jan 2012 13:59:12 +0200
Subject: mtd: do not use mtd->block_markbad directly

Instead, use the new 'mtd_can_have_bb()', or just rely on 'mtd_block_markbad()'
return code, which will be -EOPNOTSUPP if bad blocks are not supported.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdchar.c   | 5 +----
 drivers/mtd/mtdconcat.c | 2 +-
 drivers/mtd/mtdoops.c   | 2 +-
 drivers/mtd/mtdswap.c   | 2 +-
 fs/jffs2/wbuf.c         | 3 ---
 include/linux/mtd/mtd.h | 2 ++
 6 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 64efcbf087e9..50c6a1e7f675 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -877,10 +877,7 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg)
 
 		if (copy_from_user(&offs, argp, sizeof(loff_t)))
 			return -EFAULT;
-		if (!mtd->block_markbad)
-			ret = -EOPNOTSUPP;
-		else
-			return mtd_block_markbad(mtd, offs);
+		return mtd_block_markbad(mtd, offs);
 		break;
 	}
 
diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c
index fbf3cb124a93..1ed5103b219b 100644
--- a/drivers/mtd/mtdconcat.c
+++ b/drivers/mtd/mtdconcat.c
@@ -673,7 +673,7 @@ static int concat_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	struct mtd_concat *concat = CONCAT(mtd);
 	int i, err = -EINVAL;
 
-	if (!concat->subdev[0]->block_markbad)
+	if (!mtd_can_have_bb(concat->subdev[0]))
 		return 0;
 
 	if (ofs > mtd->size)
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index a4c8f67560e0..db8e8272d69b 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -199,7 +199,7 @@ badblock:
 		return;
 	}
 
-	if (mtd->block_markbad && ret == -EIO) {
+	if (mtd_can_have_bb(mtd) && ret == -EIO) {
 		ret = mtd_block_markbad(mtd, cxt->nextpage * record_size);
 		if (ret < 0) {
 			printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n");
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 3fc8cb2756c0..c92f0f6bc130 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -274,7 +274,7 @@ static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb)
 	eb->root = NULL;
 
 	/* badblocks not supported */
-	if (!d->mtd->block_markbad)
+	if (!mtd_can_have_bb(d->mtd))
 		return 1;
 
 	offset = mtdswap_eb_offset(d, eb);
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index fd96b757433f..30e8f47e8a23 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1130,9 +1130,6 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *
 	if( ++jeb->bad_count < MAX_ERASE_FAILURES)
 		return 0;
 
-	if (!c->mtd->block_markbad)
-		return 1; // What else can we do?
-
 	printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset);
 	ret = mtd_block_markbad(c->mtd, bad_offset);
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7e35755f6931..1a81fde8f333 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -447,6 +447,8 @@ static inline int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs)
 
 static inline int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs)
 {
+	if (!mtd->block_markbad)
+		return -EOPNOTSUPP;
 	return mtd->block_markbad(mtd, ofs);
 }
 
-- 
cgit v1.2.3


From eaf5f9073533cde21c7121c136f1c3f072d9cf59 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 10 Jan 2012 18:22:25 +0100
Subject: fix shrink_dcache_parent() livelock

Two (or more) concurrent calls of shrink_dcache_parent() on the same dentry may
cause shrink_dcache_parent() to loop forever.

Here's what appears to happen:

1 - CPU0: select_parent(P) finds C and puts it on dispose list, returns 1

2 - CPU1: select_parent(P) locks P->d_lock

3 - CPU0: shrink_dentry_list() locks C->d_lock
   dentry_kill(C) tries to lock P->d_lock but fails, unlocks C->d_lock

4 - CPU1: select_parent(P) locks C->d_lock,
         moves C from dispose list being processed on CPU0 to the new
dispose list, returns 1

5 - CPU0: shrink_dentry_list() finds dispose list empty, returns

6 - Goto 2 with CPU0 and CPU1 switched

Basically select_parent() steals the dentry from shrink_dentry_list() and thinks
it found a new one, causing shrink_dentry_list() to think it's making progress
and loop over and over.

One way to trigger this is to make udev calls stat() on the sysfs file while it
is going away.

Having a file in /lib/udev/rules.d/ with only this one rule seems to the trick:

ATTR{vendor}=="0x8086", ATTR{device}=="0x10ca", ENV{PCI_SLOT_NAME}="%k", ENV{MATCHADDR}="$attr{address}", RUN+="/bin/true"

Then execute the following loop:

while true; do
        echo -bond0 > /sys/class/net/bonding_masters
        echo +bond0 > /sys/class/net/bonding_masters
        echo -bond1 > /sys/class/net/bonding_masters
        echo +bond1 > /sys/class/net/bonding_masters
done

One fix would be to check all callers and prevent concurrent calls to
shrink_dcache_parent().  But I think a better solution is to stop the
stealing behavior.

This patch adds a new dentry flag that is set when the dentry is added to the
dispose list.  The flag is cleared in dentry_lru_del() in case the dentry gets a
new reference just before being pruned.

If the dentry has this flag, select_parent() will skip it and let
shrink_dentry_list() retry pruning it.  With select_parent() skipping those
dentries there will not be the appearance of progress (new dentries found) when
there is none, hence shrink_dcache_parent() will not loop forever.

Set the flag is also set in prune_dcache_sb() for consistency as suggested by
Linus.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 15 +++++++++++----
 include/linux/dcache.h |  1 +
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 3c6d3113a255..616fedff011a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -243,6 +243,7 @@ static void dentry_lru_add(struct dentry *dentry)
 static void __dentry_lru_del(struct dentry *dentry)
 {
 	list_del_init(&dentry->d_lru);
+	dentry->d_flags &= ~DCACHE_SHRINK_LIST;
 	dentry->d_sb->s_nr_dentry_unused--;
 	dentry_stat.nr_unused--;
 }
@@ -806,6 +807,7 @@ relock:
 			spin_unlock(&dentry->d_lock);
 		} else {
 			list_move_tail(&dentry->d_lru, &tmp);
+			dentry->d_flags |= DCACHE_SHRINK_LIST;
 			spin_unlock(&dentry->d_lock);
 			if (!--count)
 				break;
@@ -1097,14 +1099,19 @@ resume:
 
 		/*
 		 * move only zero ref count dentries to the dispose list.
+		 *
+		 * Those which are presently on the shrink list, being processed
+		 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
+		 * loop in shrink_dcache_parent() might not make any progress
+		 * and loop forever.
 		 */
-		if (!dentry->d_count) {
+		if (dentry->d_count) {
+			dentry_lru_del(dentry);
+		} else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
 			dentry_lru_move_list(dentry, dispose);
+			dentry->d_flags |= DCACHE_SHRINK_LIST;
 			found++;
-		} else {
-			dentry_lru_del(dentry);
 		}
-
 		/*
 		 * We can return to the caller if we have found some (this
 		 * ensures forward progress). We'll be coming back to find
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index a47bda5f76db..31f73220e7d7 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -203,6 +203,7 @@ struct dentry_operations {
 
 #define DCACHE_CANT_MOUNT	0x0100
 #define DCACHE_GENOCIDE		0x0200
+#define DCACHE_SHRINK_LIST	0x0400
 
 #define DCACHE_NFSFS_RENAMED	0x1000
      /* this dentry has been "silly renamed" and has to be deleted on the last
-- 
cgit v1.2.3


From 1edf223485c42c99655dcd001db1e46ad5e5d2d7 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 10 Jan 2012 15:06:57 -0800
Subject: mm/page-writeback.c: make determine_dirtyable_memory static again

The tracing ring-buffer used this function briefly, but not anymore.
Make it local to the writeback code again.

Also, move the function so that no forward declaration needs to be
reintroduced.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/writeback.h |   2 -
 mm/page-writeback.c       | 122 +++++++++++++++++++++++-----------------------
 2 files changed, 60 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a378c295851f..34a005515fef 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -138,8 +138,6 @@ extern int vm_highmem_is_dirtyable;
 extern int block_dump;
 extern int laptop_mode;
 
-extern unsigned long determine_dirtyable_memory(void);
-
 extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8616ef3025a4..c081bf62202b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -129,6 +129,66 @@ unsigned long global_dirty_limit;
  */
 static struct prop_descriptor vm_completions;
 
+/*
+ * Work out the current dirty-memory clamping and background writeout
+ * thresholds.
+ *
+ * The main aim here is to lower them aggressively if there is a lot of mapped
+ * memory around.  To avoid stressing page reclaim with lots of unreclaimable
+ * pages.  It is better to clamp down on writers than to start swapping, and
+ * performing lots of scanning.
+ *
+ * We only allow 1/2 of the currently-unmapped memory to be dirtied.
+ *
+ * We don't permit the clamping level to fall below 5% - that is getting rather
+ * excessive.
+ *
+ * We make sure that the background writeout level is below the adjusted
+ * clamping level.
+ */
+static unsigned long highmem_dirtyable_memory(unsigned long total)
+{
+#ifdef CONFIG_HIGHMEM
+	int node;
+	unsigned long x = 0;
+
+	for_each_node_state(node, N_HIGH_MEMORY) {
+		struct zone *z =
+			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
+
+		x += zone_page_state(z, NR_FREE_PAGES) +
+		     zone_reclaimable_pages(z);
+	}
+	/*
+	 * Make sure that the number of highmem pages is never larger
+	 * than the number of the total dirtyable memory. This can only
+	 * occur in very strange VM situations but we want to make sure
+	 * that this does not occur.
+	 */
+	return min(x, total);
+#else
+	return 0;
+#endif
+}
+
+/**
+ * determine_dirtyable_memory - amount of memory that may be used
+ *
+ * Returns the numebr of pages that can currently be freed and used
+ * by the kernel for direct mappings.
+ */
+static unsigned long determine_dirtyable_memory(void)
+{
+	unsigned long x;
+
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
+
+	if (!vm_highmem_is_dirtyable)
+		x -= highmem_dirtyable_memory(x);
+
+	return x + 1;	/* Ensure that we never return 0 */
+}
+
 /*
  * couple the period to the dirty_ratio:
  *
@@ -196,7 +256,6 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
 	return ret;
 }
 
-
 int dirty_bytes_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
@@ -291,67 +350,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 }
 EXPORT_SYMBOL(bdi_set_max_ratio);
 
-/*
- * Work out the current dirty-memory clamping and background writeout
- * thresholds.
- *
- * The main aim here is to lower them aggressively if there is a lot of mapped
- * memory around.  To avoid stressing page reclaim with lots of unreclaimable
- * pages.  It is better to clamp down on writers than to start swapping, and
- * performing lots of scanning.
- *
- * We only allow 1/2 of the currently-unmapped memory to be dirtied.
- *
- * We don't permit the clamping level to fall below 5% - that is getting rather
- * excessive.
- *
- * We make sure that the background writeout level is below the adjusted
- * clamping level.
- */
-
-static unsigned long highmem_dirtyable_memory(unsigned long total)
-{
-#ifdef CONFIG_HIGHMEM
-	int node;
-	unsigned long x = 0;
-
-	for_each_node_state(node, N_HIGH_MEMORY) {
-		struct zone *z =
-			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
-
-		x += zone_page_state(z, NR_FREE_PAGES) +
-		     zone_reclaimable_pages(z);
-	}
-	/*
-	 * Make sure that the number of highmem pages is never larger
-	 * than the number of the total dirtyable memory. This can only
-	 * occur in very strange VM situations but we want to make sure
-	 * that this does not occur.
-	 */
-	return min(x, total);
-#else
-	return 0;
-#endif
-}
-
-/**
- * determine_dirtyable_memory - amount of memory that may be used
- *
- * Returns the numebr of pages that can currently be freed and used
- * by the kernel for direct mappings.
- */
-unsigned long determine_dirtyable_memory(void)
-{
-	unsigned long x;
-
-	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
-
-	if (!vm_highmem_is_dirtyable)
-		x -= highmem_dirtyable_memory(x);
-
-	return x + 1;	/* Ensure that we never return 0 */
-}
-
 static unsigned long dirty_freerun_ceiling(unsigned long thresh,
 					   unsigned long bg_thresh)
 {
-- 
cgit v1.2.3


From cc59850ef940e4ee6a765d28b439b9bafe07cf63 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 10 Jan 2012 15:07:04 -0800
Subject: mm: add free_hot_cold_page_list() helper

This patch adds helper free_hot_cold_page_list() to free list of 0-order
pages.  It frees pages directly from list without temporary page-vector.
It also calls trace_mm_pagevec_free() to simulate pagevec_free()
behaviour.

bloat-o-meter:

add/remove: 1/1 grow/shrink: 1/3 up/down: 267/-295 (-28)
function                                     old     new   delta
free_hot_cold_page_list                        -     264    +264
get_page_from_freelist                      2129    2132      +3
__pagevec_free                               243     239      -4
split_free_page                              380     373      -7
release_pages                                606     510     -96
free_page_list                               188       -    -188

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  1 +
 mm/page_alloc.c     | 13 +++++++++++++
 mm/swap.c           | 14 +++-----------
 mm/vmscan.c         | 20 +-------------------
 4 files changed, 18 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 3a76faf6a3ee..656295865d58 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -358,6 +358,7 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_cold_page(struct page *page, int cold);
+extern void free_hot_cold_page_list(struct list_head *list, int cold);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7990ca154d1b..cd0c95c6cc9e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1188,6 +1188,19 @@ out:
 	local_irq_restore(flags);
 }
 
+/*
+ * Free a list of 0-order pages
+ */
+void free_hot_cold_page_list(struct list_head *list, int cold)
+{
+	struct page *page, *next;
+
+	list_for_each_entry_safe(page, next, list, lru) {
+		trace_mm_pagevec_free(page, cold);
+		free_hot_cold_page(page, cold);
+	}
+}
+
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
diff --git a/mm/swap.c b/mm/swap.c
index a91caf754d9b..67a09a633a09 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -585,11 +585,10 @@ int lru_add_drain_all(void)
 void release_pages(struct page **pages, int nr, int cold)
 {
 	int i;
-	struct pagevec pages_to_free;
+	LIST_HEAD(pages_to_free);
 	struct zone *zone = NULL;
 	unsigned long uninitialized_var(flags);
 
-	pagevec_init(&pages_to_free, cold);
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
 
@@ -620,19 +619,12 @@ void release_pages(struct page **pages, int nr, int cold)
 			del_page_from_lru(zone, page);
 		}
 
-		if (!pagevec_add(&pages_to_free, page)) {
-			if (zone) {
-				spin_unlock_irqrestore(&zone->lru_lock, flags);
-				zone = NULL;
-			}
-			__pagevec_free(&pages_to_free);
-			pagevec_reinit(&pages_to_free);
-  		}
+		list_add(&page->lru, &pages_to_free);
 	}
 	if (zone)
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
-	pagevec_free(&pages_to_free);
+	free_hot_cold_page_list(&pages_to_free, cold);
 }
 EXPORT_SYMBOL(release_pages);
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 753a2dc300b9..3d571df41c79 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -734,24 +734,6 @@ static enum page_references page_check_references(struct page *page,
 	return PAGEREF_RECLAIM;
 }
 
-static noinline_for_stack void free_page_list(struct list_head *free_pages)
-{
-	struct pagevec freed_pvec;
-	struct page *page, *tmp;
-
-	pagevec_init(&freed_pvec, 1);
-
-	list_for_each_entry_safe(page, tmp, free_pages, lru) {
-		list_del(&page->lru);
-		if (!pagevec_add(&freed_pvec, page)) {
-			__pagevec_free(&freed_pvec);
-			pagevec_reinit(&freed_pvec);
-		}
-	}
-
-	pagevec_free(&freed_pvec);
-}
-
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -1015,7 +997,7 @@ keep_lumpy:
 	if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
 		zone_set_flag(zone, ZONE_CONGESTED);
 
-	free_page_list(&free_pages);
+	free_hot_cold_page_list(&free_pages, 1);
 
 	list_splice(&ret_pages, page_list);
 	count_vm_events(PGACTIVATE, pgactivate);
-- 
cgit v1.2.3


From da066ad3570b88e7dee82e76a06ee9a7adffcf0d Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 10 Jan 2012 15:07:06 -0800
Subject: mm: remove unused pagevec_free

It not exported and now nobody uses it.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagevec.h |  7 -------
 mm/page_alloc.c         | 10 ----------
 2 files changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index bab82f4c571c..ed17024d2ebe 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -21,7 +21,6 @@ struct pagevec {
 };
 
 void __pagevec_release(struct pagevec *pvec);
-void __pagevec_free(struct pagevec *pvec);
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 void pagevec_strip(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
@@ -67,12 +66,6 @@ static inline void pagevec_release(struct pagevec *pvec)
 		__pagevec_release(pvec);
 }
 
-static inline void pagevec_free(struct pagevec *pvec)
-{
-	if (pagevec_count(pvec))
-		__pagevec_free(pvec);
-}
-
 static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
 {
 	____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cd0c95c6cc9e..6c77efbca5bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2319,16 +2319,6 @@ unsigned long get_zeroed_page(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(get_zeroed_page);
 
-void __pagevec_free(struct pagevec *pvec)
-{
-	int i = pagevec_count(pvec);
-
-	while (--i >= 0) {
-		trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
-		free_hot_cold_page(pvec->pages[i], pvec->cold);
-	}
-}
-
 void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
-- 
cgit v1.2.3


From f90ac3982a78d36f894824636beeef13361d7c59 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 10 Jan 2012 15:07:15 -0800
Subject: mm: avoid livelock on !__GFP_FS allocations

Colin Cross reported;

  Under the following conditions, __alloc_pages_slowpath can loop forever:
  gfp_mask & __GFP_WAIT is true
  gfp_mask & __GFP_FS is false
  reclaim and compaction make no progress
  order <= PAGE_ALLOC_COSTLY_ORDER

  These conditions happen very often during suspend and resume,
  when pm_restrict_gfp_mask() effectively converts all GFP_KERNEL
  allocations into __GFP_WAIT.

  The oom killer is not run because gfp_mask & __GFP_FS is false,
  but should_alloc_retry will always return true when order is less
  than PAGE_ALLOC_COSTLY_ORDER.

In his fix, he avoided retrying the allocation if reclaim made no progress
and __GFP_FS was not set.  The problem is that this would result in
GFP_NOIO allocations failing that previously succeeded which would be very
unfortunate.

The big difference between GFP_NOIO and suspend converting GFP_KERNEL to
behave like GFP_NOIO is that normally flushers will be cleaning pages and
kswapd reclaims pages allowing GFP_NOIO to succeed after a short delay.
The same does not necessarily apply during suspend as the storage device
may be suspended.

This patch special cases the suspend case to fail the page allocation if
reclaim cannot make progress and adds some documentation on how
gfp_allowed_mask is currently used.  Failing allocations like this may
cause suspend to abort but that is better than a livelock.

[mgorman@suse.de: Rework fix to be suspend specific]
[rientjes@google.com: Move suspended device check to should_alloc_retry]
Reported-by: Colin Cross <ccross@android.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 16 ++++++++++++++++
 mm/page_alloc.c     | 30 ++++++++++++++++++++++--------
 mm/swapfile.c       |  6 +++---
 3 files changed, 41 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 656295865d58..91812df1351a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -368,9 +368,25 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
 void drain_all_pages(void);
 void drain_local_pages(void *dummy);
 
+/*
+ * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
+ * GFP flags are used before interrupts are enabled. Once interrupts are
+ * enabled, it is set to __GFP_BITS_MASK while the system is running. During
+ * hibernation, it is used by PM to avoid I/O during memory allocation while
+ * devices are suspended.
+ */
 extern gfp_t gfp_allowed_mask;
 
 extern void pm_restrict_gfp_mask(void);
 extern void pm_restore_gfp_mask(void);
 
+#ifdef CONFIG_PM_SLEEP
+extern bool pm_suspended_storage(void);
+#else
+static inline bool pm_suspended_storage(void)
+{
+	return false;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 #endif /* __LINUX_GFP_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 671e6c94fed7..3cba4b67203f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -127,6 +127,13 @@ void pm_restrict_gfp_mask(void)
 	saved_gfp_mask = gfp_allowed_mask;
 	gfp_allowed_mask &= ~GFP_IOFS;
 }
+
+bool pm_suspended_storage(void)
+{
+	if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
+		return false;
+	return true;
+}
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -1786,12 +1793,25 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 
 static inline int
 should_alloc_retry(gfp_t gfp_mask, unsigned int order,
+				unsigned long did_some_progress,
 				unsigned long pages_reclaimed)
 {
 	/* Do not loop if specifically requested */
 	if (gfp_mask & __GFP_NORETRY)
 		return 0;
 
+	/* Always retry if specifically requested */
+	if (gfp_mask & __GFP_NOFAIL)
+		return 1;
+
+	/*
+	 * Suspend converts GFP_KERNEL to __GFP_WAIT which can prevent reclaim
+	 * making forward progress without invoking OOM. Suspend also disables
+	 * storage devices so kswapd will not help. Bail if we are suspending.
+	 */
+	if (!did_some_progress && pm_suspended_storage())
+		return 0;
+
 	/*
 	 * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER
 	 * means __GFP_NOFAIL, but that may not be true in other
@@ -1810,13 +1830,6 @@ should_alloc_retry(gfp_t gfp_mask, unsigned int order,
 	if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order))
 		return 1;
 
-	/*
-	 * Don't let big-order allocations loop unless the caller
-	 * explicitly requests that.
-	 */
-	if (gfp_mask & __GFP_NOFAIL)
-		return 1;
-
 	return 0;
 }
 
@@ -2209,7 +2222,8 @@ rebalance:
 
 	/* Check if we should retry the allocation */
 	pages_reclaimed += did_some_progress;
-	if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
+	if (should_alloc_retry(gfp_mask, order, did_some_progress,
+						pages_reclaimed)) {
 		/* Wait for some write requests to complete then retry */
 		wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
 		goto rebalance;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b1cd12060723..9520592d4231 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -667,10 +667,10 @@ int try_to_free_swap(struct page *page)
 	 * original page might be freed under memory pressure, then
 	 * later read back in from swap, now with the wrong data.
 	 *
-	 * Hibernation clears bits from gfp_allowed_mask to prevent
-	 * memory reclaim from writing to disk, so check that here.
+	 * Hibration suspends storage while it is writing the image
+	 * to disk so check that here.
 	 */
-	if (!(gfp_allowed_mask & __GFP_IO))
+	if (pm_suspended_storage())
 		return 0;
 
 	delete_from_swap_cache(page);
-- 
cgit v1.2.3


From 1399ff86f2a2bbacbbe68fa00c5f8c752b344723 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 10 Jan 2012 15:07:25 -0800
Subject: kernel.h: add BUILD_BUG() macro

We can place this in definitions that we expect the compiler to remove by
dead code elimination.  If this assertion fails, we get a nice error
message at build time.

The GCC function attribute error("message") was added in version 4.3, so
we define a new macro __linktime_error(message) to expand to this for
GCC-4.3 and later.  This will give us an error diagnostic from the
compiler on the line that fails.  For other compilers
__linktime_error(message) expands to nothing, and we have to be content
with a link time error, but at least we will still get a build error.

BUILD_BUG() expands to the undefined function __build_bug_failed() and
will fail at link time if the compiler ever emits code for it.  On GCC-4.3
and later, attribute((error())) is used so that the failure will be noted
at compile time instead.

Signed-off-by: David Daney <david.daney@cavium.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: DM <dm.n9107@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h |  1 +
 include/linux/compiler.h      |  4 +++-
 include/linux/kernel.h        | 16 ++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index dfadc96e9d63..2f4079175afb 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -29,6 +29,7 @@
    the kernel context */
 #define __cold			__attribute__((__cold__))
 
+#define __linktime_error(message) __attribute__((__error__(message)))
 
 #if __GNUC_MINOR__ >= 5
 /*
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 320d6c94ff84..4a243546d142 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -293,7 +293,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #ifndef __compiletime_error
 # define __compiletime_error(message)
 #endif
-
+#ifndef __linktime_error
+# define __linktime_error(message)
+#endif
 /*
  * Prevent the compiler from merging or refetching accesses.  The compiler
  * is also forbidden from reordering successive instances of ACCESS_ONCE(),
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e8b1597b5cf2..f48e8a528544 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -665,6 +665,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 #define BUILD_BUG_ON_ZERO(e) (0)
 #define BUILD_BUG_ON_NULL(e) ((void*)0)
 #define BUILD_BUG_ON(condition)
+#define BUILD_BUG() (0)
 #else /* __CHECKER__ */
 
 /* Force a compilation error if a constant expression is not a power of 2 */
@@ -703,6 +704,21 @@ extern int __build_bug_on_failed;
 		if (condition) __build_bug_on_failed = 1;	\
 	} while(0)
 #endif
+
+/**
+ * BUILD_BUG - break compile if used.
+ *
+ * If you have some code that you expect the compiler to eliminate at
+ * build time, you should use BUILD_BUG to detect if it is
+ * unexpectedly used.
+ */
+#define BUILD_BUG()						\
+	do {							\
+		extern void __build_bug_failed(void)		\
+			__linktime_error("BUILD_BUG failed");	\
+		__build_bug_failed();				\
+	} while (0)
+
 #endif	/* __CHECKER__ */
 
 /* Trap pasters of __FUNCTION__ at compile-time */
-- 
cgit v1.2.3


From c0a32fc5a2e470d0b02597b23ad79a317735253e Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Tue, 10 Jan 2012 15:07:28 -0800
Subject: mm: more intensive memory corruption debugging

With CONFIG_DEBUG_PAGEALLOC configured, the CPU will generate an exception
on access (read,write) to an unallocated page, which permits us to catch
code which corrupts memory.  However the kernel is trying to maximise
memory usage, hence there are usually few free pages in the system and
buggy code usually corrupts some crucial data.

This patch changes the buddy allocator to keep more free/protected pages
and to interlace free/protected and allocated pages to increase the
probability of catching corruption.

When the kernel is compiled with CONFIG_DEBUG_PAGEALLOC,
debug_guardpage_minorder defines the minimum order used by the page
allocator to grant a request.  The requested size will be returned with
the remaining pages used as guard pages.

The default value of debug_guardpage_minorder is zero: no change from
current behaviour.

[akpm@linux-foundation.org: tweak documentation, s/flg/flag/]
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt | 19 ++++++++++
 include/linux/mm.h                  | 17 +++++++++
 include/linux/page-debug-flags.h    |  4 +-
 mm/Kconfig.debug                    |  5 +++
 mm/page_alloc.c                     | 75 ++++++++++++++++++++++++++++++++++---
 5 files changed, 113 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7b2e5c5eefa6..7ed7030e7722 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -623,6 +623,25 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	no_debug_objects
 			[KNL] Disable object debugging
 
+	debug_guardpage_minorder=
+			[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
+			parameter allows control of the order of pages that will
+			be intentionally kept free (and hence protected) by the
+			buddy allocator. Bigger value increase the probability
+			of catching random memory corruption, but reduce the
+			amount of memory for normal system use. The maximum
+			possible value is MAX_ORDER/2.  Setting this parameter
+			to 1 or 2 should be enough to identify most random
+			memory corruption problems caused by bugs in kernel or
+			driver code when a CPU writes to (or reads from) a
+			random memory location. Note that there exists a class
+			of memory corruptions problems caused by buggy H/W or
+			F/W or by drivers badly programing DMA (basically when
+			memory is written at bus level and the CPU MMU is
+			bypassed) which are not detectable by
+			CONFIG_DEBUG_PAGEALLOC, hence this option will not help
+			tracking down these problems.
+
 	debugpat	[X86] Enable PAT debugging
 
 	decnet.addr=	[HW,NET]
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5d9b4c9813bd..5568553a41fd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1618,5 +1618,22 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned int pages_per_huge_page);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+extern unsigned int _debug_guardpage_minorder;
+
+static inline unsigned int debug_guardpage_minorder(void)
+{
+	return _debug_guardpage_minorder;
+}
+
+static inline bool page_is_guard(struct page *page)
+{
+	return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+#else
+static inline unsigned int debug_guardpage_minorder(void) { return 0; }
+static inline bool page_is_guard(struct page *page) { return false; }
+#endif /* CONFIG_DEBUG_PAGEALLOC */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h
index b0638fd91e92..22691f614043 100644
--- a/include/linux/page-debug-flags.h
+++ b/include/linux/page-debug-flags.h
@@ -13,6 +13,7 @@
 
 enum page_debug_flags {
 	PAGE_DEBUG_FLAG_POISON,		/* Page is poisoned */
+	PAGE_DEBUG_FLAG_GUARD,
 };
 
 /*
@@ -21,7 +22,8 @@ enum page_debug_flags {
  */
 
 #ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
-#if !defined(CONFIG_PAGE_POISONING) \
+#if !defined(CONFIG_PAGE_POISONING) && \
+    !defined(CONFIG_PAGE_GUARD) \
 /* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */
 #error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features!
 #endif
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 8b1a477162dc..4b2443254de2 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -4,6 +4,7 @@ config DEBUG_PAGEALLOC
 	depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
 	depends on !KMEMCHECK
 	select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	---help---
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  This results in a large slowdown, but helps to find certain types
@@ -22,3 +23,7 @@ config WANT_PAGE_DEBUG_FLAGS
 config PAGE_POISONING
 	bool
 	select WANT_PAGE_DEBUG_FLAGS
+
+config PAGE_GUARD
+	bool
+	select WANT_PAGE_DEBUG_FLAGS
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3cba4b67203f..93baebcc06f3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,6 +57,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/page-debug-flags.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -388,6 +389,37 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 		clear_highpage(page + i);
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+unsigned int _debug_guardpage_minorder;
+
+static int __init debug_guardpage_minorder_setup(char *buf)
+{
+	unsigned long res;
+
+	if (kstrtoul(buf, 10, &res) < 0 ||  res > MAX_ORDER / 2) {
+		printk(KERN_ERR "Bad debug_guardpage_minorder value\n");
+		return 0;
+	}
+	_debug_guardpage_minorder = res;
+	printk(KERN_INFO "Setting debug_guardpage_minorder to %lu\n", res);
+	return 0;
+}
+__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
+
+static inline void set_page_guard_flag(struct page *page)
+{
+	__set_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+
+static inline void clear_page_guard_flag(struct page *page)
+{
+	__clear_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+#else
+static inline void set_page_guard_flag(struct page *page) { }
+static inline void clear_page_guard_flag(struct page *page) { }
+#endif
+
 static inline void set_page_order(struct page *page, int order)
 {
 	set_page_private(page, order);
@@ -445,6 +477,11 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
 	if (page_zone_id(page) != page_zone_id(buddy))
 		return 0;
 
+	if (page_is_guard(buddy) && page_order(buddy) == order) {
+		VM_BUG_ON(page_count(buddy) != 0);
+		return 1;
+	}
+
 	if (PageBuddy(buddy) && page_order(buddy) == order) {
 		VM_BUG_ON(page_count(buddy) != 0);
 		return 1;
@@ -501,11 +538,19 @@ static inline void __free_one_page(struct page *page,
 		buddy = page + (buddy_idx - page_idx);
 		if (!page_is_buddy(page, buddy, order))
 			break;
-
-		/* Our buddy is free, merge with it and move up one order. */
-		list_del(&buddy->lru);
-		zone->free_area[order].nr_free--;
-		rmv_page_order(buddy);
+		/*
+		 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
+		 * merge with it and move up one order.
+		 */
+		if (page_is_guard(buddy)) {
+			clear_page_guard_flag(buddy);
+			set_page_private(page, 0);
+			__mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
+		} else {
+			list_del(&buddy->lru);
+			zone->free_area[order].nr_free--;
+			rmv_page_order(buddy);
+		}
 		combined_idx = buddy_idx & page_idx;
 		page = page + (combined_idx - page_idx);
 		page_idx = combined_idx;
@@ -731,6 +776,23 @@ static inline void expand(struct zone *zone, struct page *page,
 		high--;
 		size >>= 1;
 		VM_BUG_ON(bad_range(zone, &page[size]));
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+		if (high < debug_guardpage_minorder()) {
+			/*
+			 * Mark as guard pages (or page), that will allow to
+			 * merge back to allocator when buddy will be freed.
+			 * Corresponding page table entries will not be touched,
+			 * pages will stay not present in virtual address space
+			 */
+			INIT_LIST_HEAD(&page[size].lru);
+			set_page_guard_flag(&page[size]);
+			set_page_private(&page[size], high);
+			/* Guard pages are not available for any usage */
+			__mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high));
+			continue;
+		}
+#endif
 		list_add(&page[size].lru, &area->free_list[migratetype]);
 		area->nr_free++;
 		set_page_order(&page[size], high);
@@ -1754,7 +1816,8 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 {
 	unsigned int filter = SHOW_MEM_FILTER_NODES;
 
-	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+	if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
+	    debug_guardpage_minorder() > 0)
 		return;
 
 	/*
-- 
cgit v1.2.3


From f6d7e0cb3ecc248e98fa11d83253f6174bd7e085 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 10 Jan 2012 15:07:38 -0800
Subject: mm, debug: test for online nid when allocating on single node

Calling alloc_pages_exact_node() means the allocation only passes the
zonelist of a single node into the page allocator.  If that node isn't
online, it's zonelist may never have been initialized causing a strange
oops that may not immediately be clear.

I recently debugged an issue where node 0 wasn't online and an allocator
was passing 0 to alloc_pages_exact_node() and it resulted in a NULL
pointer on zonelist->_zoneref.  If CONFIG_DEBUG_VM is enabled, though, it
would be nice to catch this a bit earlier.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 91812df1351a..66f172fdf5fe 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -313,7 +313,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
-	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid));
 
 	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
 }
-- 
cgit v1.2.3


From ab8fabd46f811d5153d8a0cd2fac9a0d41fb593d Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Tue, 10 Jan 2012 15:07:42 -0800
Subject: mm: exclude reserved pages from dirtyable memory

Per-zone dirty limits try to distribute page cache pages allocated for
writing across zones in proportion to the individual zone sizes, to reduce
the likelihood of reclaim having to write back individual pages from the
LRU lists in order to make progress.

This patch:

The amount of dirtyable pages should not include the full number of free
pages: there is a number of reserved pages that the page allocator and
kswapd always try to keep free.

The closer (reclaimable pages - dirty pages) is to the number of reserved
pages, the more likely it becomes for reclaim to run into dirty pages:

       +----------+ ---
       |   anon   |  |
       +----------+  |
       |          |  |
       |          |  -- dirty limit new    -- flusher new
       |   file   |  |                     |
       |          |  |                     |
       |          |  -- dirty limit old    -- flusher old
       |          |                        |
       +----------+                       --- reclaim
       | reserved |
       +----------+
       |  kernel  |
       +----------+

This patch introduces a per-zone dirty reserve that takes both the lowmem
reserve as well as the high watermark of the zone into account, and a
global sum of those per-zone values that is subtracted from the global
amount of dirtyable pages.  The lowmem reserve is unavailable to page
cache allocations and kswapd tries to keep the high watermark free.  We
don't want to end up in a situation where reclaim has to clean pages in
order to balance zones.

Not treating reserved pages as dirtyable on a global level is only a
conceptual fix.  In reality, dirty pages are not distributed equally
across zones and reclaim runs into dirty pages on a regular basis.

But it is important to get this right before tackling the problem on a
per-zone level, where the distance between reclaim and the dirty pages is
mostly much smaller in absolute numbers.

[akpm@linux-foundation.org: fix highmem build]
Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  6 ++++++
 include/linux/swap.h   |  1 +
 mm/page-writeback.c    |  5 +++--
 mm/page_alloc.c        | 19 +++++++++++++++++++
 4 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3ac040f19369..ca6ca92418a6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -317,6 +317,12 @@ struct zone {
 	 */
 	unsigned long		lowmem_reserve[MAX_NR_ZONES];
 
+	/*
+	 * This is a per-zone reserve of pages that should not be
+	 * considered dirtyable memory.
+	 */
+	unsigned long		dirty_balance_reserve;
+
 #ifdef CONFIG_NUMA
 	int node;
 	/*
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1e22e126d2ac..06061a7f8e69 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -207,6 +207,7 @@ struct swap_list_t {
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
+extern unsigned long dirty_balance_reserve;
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c081bf62202b..9ab6de82d8e6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -157,7 +157,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
 		x += zone_page_state(z, NR_FREE_PAGES) +
-		     zone_reclaimable_pages(z);
+		     zone_reclaimable_pages(z) - z->dirty_balance_reserve;
 	}
 	/*
 	 * Make sure that the number of highmem pages is never larger
@@ -181,7 +181,8 @@ static unsigned long determine_dirtyable_memory(void)
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() -
+	    dirty_balance_reserve;
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 93baebcc06f3..2cb9eb71e282 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -97,6 +97,14 @@ EXPORT_SYMBOL(node_states);
 
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
+/*
+ * When calculating the number of globally allowed dirty pages, there
+ * is a certain number of per-zone reserves that should not be
+ * considered dirtyable memory.  This is the sum of those reserves
+ * over all existing zones that contribute dirtyable memory.
+ */
+unsigned long dirty_balance_reserve __read_mostly;
+
 int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
@@ -4822,8 +4830,19 @@ static void calculate_totalreserve_pages(void)
 			if (max > zone->present_pages)
 				max = zone->present_pages;
 			reserve_pages += max;
+			/*
+			 * Lowmem reserves are not available to
+			 * GFP_HIGHUSER page cache allocations and
+			 * kswapd tries to balance zones to their high
+			 * watermark.  As a result, neither should be
+			 * regarded as dirtyable memory, to prevent a
+			 * situation where reclaim has to clean pages
+			 * in order to balance the zones.
+			 */
+			zone->dirty_balance_reserve = max;
 		}
 	}
+	dirty_balance_reserve = reserve_pages;
 	totalreserve_pages = reserve_pages;
 }
 
-- 
cgit v1.2.3


From a756cf5908530e8b40bdf569eb48b40139e8d7fd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Tue, 10 Jan 2012 15:07:49 -0800
Subject: mm: try to distribute dirty pages fairly across zones

The maximum number of dirty pages that exist in the system at any time is
determined by a number of pages considered dirtyable and a user-configured
percentage of those, or an absolute number in bytes.

This number of dirtyable pages is the sum of memory provided by all the
zones in the system minus their lowmem reserves and high watermarks, so
that the system can retain a healthy number of free pages without having
to reclaim dirty pages.

But there is a flaw in that we have a zoned page allocator which does not
care about the global state but rather the state of individual memory
zones.  And right now there is nothing that prevents one zone from filling
up with dirty pages while other zones are spared, which frequently leads
to situations where kswapd, in order to restore the watermark of free
pages, does indeed have to write pages from that zone's LRU list.  This
can interfere so badly with IO from the flusher threads that major
filesystems (btrfs, xfs, ext4) mostly ignore write requests from reclaim
already, taking away the VM's only possibility to keep such a zone
balanced, aside from hoping the flushers will soon clean pages from that
zone.

Enter per-zone dirty limits.  They are to a zone's dirtyable memory what
the global limit is to the global amount of dirtyable memory, and try to
make sure that no single zone receives more than its fair share of the
globally allowed dirty pages in the first place.  As the number of pages
considered dirtyable excludes the zones' lowmem reserves and high
watermarks, the maximum number of dirty pages in a zone is such that the
zone can always be balanced without requiring page cleaning.

As this is a placement decision in the page allocator and pages are
dirtied only after the allocation, this patch allows allocators to pass
__GFP_WRITE when they know in advance that the page will be written to and
become dirty soon.  The page allocator will then attempt to allocate from
the first zone of the zonelist - which on NUMA is determined by the task's
NUMA memory policy - that has not exceeded its dirty limit.

At first glance, it would appear that the diversion to lower zones can
increase pressure on them, but this is not the case.  With a full high
zone, allocations will be diverted to lower zones eventually, so it is
more of a shift in timing of the lower zone allocations.  Workloads that
previously could fit their dirty pages completely in the higher zone may
be forced to allocate from lower zones, but the amount of pages that
"spill over" are limited themselves by the lower zones' dirty constraints,
and thus unlikely to become a problem.

For now, the problem of unfair dirty page distribution remains for NUMA
configurations where the zones allowed for allocation are in sum not big
enough to trigger the global dirty limits, wake up the flusher threads and
remedy the situation.  Because of this, an allocation that could not
succeed on any of the considered zones is allowed to ignore the dirty
limits before going into direct reclaim or even failing the allocation,
until a future patch changes the global dirty throttling and flusher
thread activation so that they take individual zone states into account.

			Test results

15M DMA + 3246M DMA32 + 504 Normal = 3765M memory
40% dirty ratio
16G USB thumb drive
10 runs of dd if=/dev/zero of=disk/zeroes bs=32k count=$((10 << 15))

		seconds			nr_vmscan_write
		        (stddev)	       min|     median|        max
xfs
vanilla:	 549.747( 3.492)	     0.000|      0.000|      0.000
patched:	 550.996( 3.802)	     0.000|      0.000|      0.000

fuse-ntfs
vanilla:	1183.094(53.178)	 54349.000|  59341.000|  65163.000
patched:	 558.049(17.914)	     0.000|      0.000|     43.000

btrfs
vanilla:	 573.679(14.015)	156657.000| 460178.000| 606926.000
patched:	 563.365(11.368)	     0.000|      0.000|   1362.000

ext4
vanilla:	 561.197(15.782)	     0.000|2725438.000|4143837.000
patched:	 568.806(17.496)	     0.000|      0.000|      0.000

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Tested-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h       |  4 ++-
 include/linux/writeback.h |  1 +
 mm/page-writeback.c       | 82 +++++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c           | 29 +++++++++++++++++
 4 files changed, 115 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 66f172fdf5fe..581e74b7df95 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -36,6 +36,7 @@ struct vm_area_struct;
 #endif
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
+#define ___GFP_WRITE		0x1000000u
 
 /*
  * GFP bitmasks..
@@ -85,6 +86,7 @@ struct vm_area_struct;
 
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
+#define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
 /*
  * This may seem redundant, but it's a way of annotating false positives vs.
@@ -92,7 +94,7 @@ struct vm_area_struct;
  */
 #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
 
-#define __GFP_BITS_SHIFT 24	/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 25	/* Room for N __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 34a005515fef..6dff47304971 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -124,6 +124,7 @@ void laptop_mode_timer_fn(unsigned long data);
 static inline void laptop_sync_completion(void) { }
 #endif
 void throttle_vm_writeout(gfp_t gfp_mask);
+bool zone_dirty_ok(struct zone *zone);
 
 extern unsigned long global_dirty_limit;
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 433fa990fe8b..5cdd4f2b0c9d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -147,6 +147,24 @@ static struct prop_descriptor vm_completions;
  * clamping level.
  */
 
+/*
+ * In a memory zone, there is a certain amount of pages we consider
+ * available for the page cache, which is essentially the number of
+ * free and reclaimable pages, minus some zone reserves to protect
+ * lowmem and the ability to uphold the zone's watermarks without
+ * requiring writeback.
+ *
+ * This number of dirtyable pages is the base value of which the
+ * user-configurable dirty ratio is the effictive number of pages that
+ * are allowed to be actually dirtied.  Per individual zone, or
+ * globally by using the sum of dirtyable pages over all zones.
+ *
+ * Because the user is allowed to specify the dirty limit globally as
+ * absolute number of bytes, calculating the per-zone dirty limit can
+ * require translating the configured limit into a percentage of
+ * global dirtyable memory first.
+ */
+
 static unsigned long highmem_dirtyable_memory(unsigned long total)
 {
 #ifdef CONFIG_HIGHMEM
@@ -232,6 +250,70 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
 	trace_global_dirty_state(background, dirty);
 }
 
+/**
+ * zone_dirtyable_memory - number of dirtyable pages in a zone
+ * @zone: the zone
+ *
+ * Returns the zone's number of pages potentially available for dirty
+ * page cache.  This is the base value for the per-zone dirty limits.
+ */
+static unsigned long zone_dirtyable_memory(struct zone *zone)
+{
+	/*
+	 * The effective global number of dirtyable pages may exclude
+	 * highmem as a big-picture measure to keep the ratio between
+	 * dirty memory and lowmem reasonable.
+	 *
+	 * But this function is purely about the individual zone and a
+	 * highmem zone can hold its share of dirty pages, so we don't
+	 * care about vm_highmem_is_dirtyable here.
+	 */
+	return zone_page_state(zone, NR_FREE_PAGES) +
+	       zone_reclaimable_pages(zone) -
+	       zone->dirty_balance_reserve;
+}
+
+/**
+ * zone_dirty_limit - maximum number of dirty pages allowed in a zone
+ * @zone: the zone
+ *
+ * Returns the maximum number of dirty pages allowed in a zone, based
+ * on the zone's dirtyable memory.
+ */
+static unsigned long zone_dirty_limit(struct zone *zone)
+{
+	unsigned long zone_memory = zone_dirtyable_memory(zone);
+	struct task_struct *tsk = current;
+	unsigned long dirty;
+
+	if (vm_dirty_bytes)
+		dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
+			zone_memory / global_dirtyable_memory();
+	else
+		dirty = vm_dirty_ratio * zone_memory / 100;
+
+	if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
+		dirty += dirty / 4;
+
+	return dirty;
+}
+
+/**
+ * zone_dirty_ok - tells whether a zone is within its dirty limits
+ * @zone: the zone to check
+ *
+ * Returns %true when the dirty pages in @zone are within the zone's
+ * dirty limit, %false if the limit is exceeded.
+ */
+bool zone_dirty_ok(struct zone *zone)
+{
+	unsigned long limit = zone_dirty_limit(zone);
+
+	return zone_page_state(zone, NR_FILE_DIRTY) +
+	       zone_page_state(zone, NR_UNSTABLE_NFS) +
+	       zone_page_state(zone, NR_WRITEBACK) <= limit;
+}
+
 /*
  * couple the period to the dirty_ratio:
  *
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2cb9eb71e282..4f95bcf0f2b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1735,6 +1735,35 @@ zonelist_scan:
 		if ((alloc_flags & ALLOC_CPUSET) &&
 			!cpuset_zone_allowed_softwall(zone, gfp_mask))
 				continue;
+		/*
+		 * When allocating a page cache page for writing, we
+		 * want to get it from a zone that is within its dirty
+		 * limit, such that no single zone holds more than its
+		 * proportional share of globally allowed dirty pages.
+		 * The dirty limits take into account the zone's
+		 * lowmem reserves and high watermark so that kswapd
+		 * should be able to balance it without having to
+		 * write pages from its LRU list.
+		 *
+		 * This may look like it could increase pressure on
+		 * lower zones by failing allocations in higher zones
+		 * before they are full.  But the pages that do spill
+		 * over are limited as the lower zones are protected
+		 * by this very same mechanism.  It should not become
+		 * a practical burden to them.
+		 *
+		 * XXX: For now, allow allocations to potentially
+		 * exceed the per-zone dirty limit in the slowpath
+		 * (ALLOC_WMARK_LOW unset) before going into reclaim,
+		 * which is important when on a NUMA setup the allowed
+		 * zones are together not big enough to reach the
+		 * global limit.  The proper fix for these situations
+		 * will require awareness of zones in the
+		 * dirty-throttling and the flusher threads.
+		 */
+		if ((alloc_flags & ALLOC_WMARK_LOW) &&
+		    (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone))
+			goto this_zone_full;
 
 		BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
 		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
-- 
cgit v1.2.3


From 948f017b093a9baac23855fcd920d3a970b71bb6 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 10 Jan 2012 15:08:05 -0800
Subject: mremap: enforce rmap src/dst vma ordering in case of vma_merge()
 succeeding in copy_vma()

migrate was doing an rmap_walk with speculative lock-less access on
pagetables.  That could lead it to not serializing properly against mremap
PT locks.  But a second problem remains in the order of vmas in the
same_anon_vma list used by the rmap_walk.

If vma_merge succeeds in copy_vma, the src vma could be placed after the
dst vma in the same_anon_vma list.  That could still lead to migrate
missing some pte.

This patch adds an anon_vma_moveto_tail() function to force the dst vma at
the end of the list before mremap starts to solve the problem.

If the mremap is very large and there are a lots of parents or childs
sharing the anon_vma root lock, this should still scale better than taking
the anon_vma root lock around every pte copy practically for the whole
duration of mremap.

Update: Hugh noticed special care is needed in the error path where
move_page_tables goes in the reverse direction, a second
anon_vma_moveto_tail() call is needed in the error path.

This program exercises the anon_vma_moveto_tail:

===

int main()
{
	static struct timeval oldstamp, newstamp;
	long diffsec;
	char *p, *p2, *p3, *p4;
	if (posix_memalign((void **)&p, 2*1024*1024, SIZE))
		perror("memalign"), exit(1);
	if (posix_memalign((void **)&p2, 2*1024*1024, SIZE))
		perror("memalign"), exit(1);
	if (posix_memalign((void **)&p3, 2*1024*1024, SIZE))
		perror("memalign"), exit(1);

	memset(p, 0xff, SIZE);
	printf("%p\n", p);
	memset(p2, 0xff, SIZE);
	memset(p3, 0x77, 4096);
	if (memcmp(p, p2, SIZE))
		printf("error\n");
	p4 = mremap(p+SIZE/2, SIZE/2, SIZE/2, MREMAP_FIXED|MREMAP_MAYMOVE, p3);
	if (p4 != p3)
		perror("mremap"), exit(1);
	p4 = mremap(p4, SIZE/2, SIZE/2, MREMAP_FIXED|MREMAP_MAYMOVE, p+SIZE/2);
	if (p4 != p+SIZE/2)
		perror("mremap"), exit(1);
	if (memcmp(p, p2, SIZE))
		printf("error\n");
	printf("ok\n");

	return 0;
}
===

$ perf probe -a anon_vma_moveto_tail
Add new event:
  probe:anon_vma_moveto_tail (on anon_vma_moveto_tail)

You can now use it on all perf tools, such as:

        perf record -e probe:anon_vma_moveto_tail -aR sleep 1

$ perf record -e probe:anon_vma_moveto_tail -aR ./anon_vma_moveto_tail
0x7f2ca2800000
ok
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.043 MB perf.data (~1860 samples) ]
$ perf report --stdio
   100.00%  anon_vma_moveto  [kernel.kallsyms]  [k] anon_vma_moveto_tail

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Nai Xia <nai.xia@gmail.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Pawel Sikora <pluto@agmk.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  1 +
 mm/mmap.c            | 24 +++++++++++++++++++++---
 mm/mremap.c          |  9 +++++++++
 mm/rmap.c            | 45 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 2148b122779b..1afb9954bbf1 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -120,6 +120,7 @@ void anon_vma_init(void);	/* create anon_vma_cachep */
 int  anon_vma_prepare(struct vm_area_struct *);
 void unlink_anon_vmas(struct vm_area_struct *);
 int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
+void anon_vma_moveto_tail(struct vm_area_struct *);
 int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index eae90af60ea6..adea3b8880e3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2322,13 +2322,16 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	struct vm_area_struct *new_vma, *prev;
 	struct rb_node **rb_link, *rb_parent;
 	struct mempolicy *pol;
+	bool faulted_in_anon_vma = true;
 
 	/*
 	 * If anonymous vma has not yet been faulted, update new pgoff
 	 * to match new location, to increase its chance of merging.
 	 */
-	if (!vma->vm_file && !vma->anon_vma)
+	if (unlikely(!vma->vm_file && !vma->anon_vma)) {
 		pgoff = addr >> PAGE_SHIFT;
+		faulted_in_anon_vma = false;
+	}
 
 	find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
 	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
@@ -2337,9 +2340,24 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		/*
 		 * Source vma may have been merged into new_vma
 		 */
-		if (vma_start >= new_vma->vm_start &&
-		    vma_start < new_vma->vm_end)
+		if (unlikely(vma_start >= new_vma->vm_start &&
+			     vma_start < new_vma->vm_end)) {
+			/*
+			 * The only way we can get a vma_merge with
+			 * self during an mremap is if the vma hasn't
+			 * been faulted in yet and we were allowed to
+			 * reset the dst vma->vm_pgoff to the
+			 * destination address of the mremap to allow
+			 * the merge to happen. mremap must change the
+			 * vm_pgoff linearity between src and dst vmas
+			 * (in turn preventing a vma_merge) to be
+			 * safe. It is only safe to keep the vm_pgoff
+			 * linear if there are no pages mapped yet.
+			 */
+			VM_BUG_ON(faulted_in_anon_vma);
 			*vmap = new_vma;
+		} else
+			anon_vma_moveto_tail(new_vma);
 	} else {
 		new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 		if (new_vma) {
diff --git a/mm/mremap.c b/mm/mremap.c
index d6959cb4df58..87bb8393e7d2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -220,6 +220,15 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 
 	moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
 	if (moved_len < old_len) {
+		/*
+		 * Before moving the page tables from the new vma to
+		 * the old vma, we need to be sure the old vma is
+		 * queued after new vma in the same_anon_vma list to
+		 * prevent SMP races with rmap_walk (that could lead
+		 * rmap_walk to miss some page table).
+		 */
+		anon_vma_moveto_tail(vma);
+
 		/*
 		 * On error, move entries back from new area to old,
 		 * which will succeed since page tables still there,
diff --git a/mm/rmap.c b/mm/rmap.c
index a4fd3680038b..a2e5ce1fa081 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -271,6 +271,51 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 	return -ENOMEM;
 }
 
+/*
+ * Some rmap walk that needs to find all ptes/hugepmds without false
+ * negatives (like migrate and split_huge_page) running concurrent
+ * with operations that copy or move pagetables (like mremap() and
+ * fork()) to be safe. They depend on the anon_vma "same_anon_vma"
+ * list to be in a certain order: the dst_vma must be placed after the
+ * src_vma in the list. This is always guaranteed by fork() but
+ * mremap() needs to call this function to enforce it in case the
+ * dst_vma isn't newly allocated and chained with the anon_vma_clone()
+ * function but just an extension of a pre-existing vma through
+ * vma_merge.
+ *
+ * NOTE: the same_anon_vma list can still be changed by other
+ * processes while mremap runs because mremap doesn't hold the
+ * anon_vma mutex to prevent modifications to the list while it
+ * runs. All we need to enforce is that the relative order of this
+ * process vmas isn't changing (we don't care about other vmas
+ * order). Each vma corresponds to an anon_vma_chain structure so
+ * there's no risk that other processes calling anon_vma_moveto_tail()
+ * and changing the same_anon_vma list under mremap() will screw with
+ * the relative order of this process vmas in the list, because we
+ * they can't alter the order of any vma that belongs to this
+ * process. And there can't be another anon_vma_moveto_tail() running
+ * concurrently with mremap() coming from this process because we hold
+ * the mmap_sem for the whole mremap(). fork() ordering dependency
+ * also shouldn't be affected because fork() only cares that the
+ * parent vmas are placed in the list before the child vmas and
+ * anon_vma_moveto_tail() won't reorder vmas from either the fork()
+ * parent or child.
+ */
+void anon_vma_moveto_tail(struct vm_area_struct *dst)
+{
+	struct anon_vma_chain *pavc;
+	struct anon_vma *root = NULL;
+
+	list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) {
+		struct anon_vma *anon_vma = pavc->anon_vma;
+		VM_BUG_ON(pavc->vma != dst);
+		root = lock_anon_vma_root(root, anon_vma);
+		list_del(&pavc->same_anon_vma);
+		list_add_tail(&pavc->same_anon_vma, &anon_vma->head);
+	}
+	unlock_anon_vma_root(root);
+}
+
 /*
  * Attach vma to its own anon_vma, as well as to the anon_vmas that
  * the corresponding VMA in the parent process is attached to.
-- 
cgit v1.2.3


From fcfb4dcc9698f932836aa63ba0d82e7dbd300fb3 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 10 Jan 2012 15:08:21 -0800
Subject: mm/mempolicy.c: mpol_equal(): use bool

mpol_equal() logically returns a boolean.  Use a bool type to slightly
improve readability.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Stephen Wilson <wilsons@start.ca>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 10 +++++-----
 mm/mempolicy.c            | 14 +++++++-------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 7978eec1b7d9..7c727a90d70d 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -164,11 +164,11 @@ static inline void mpol_get(struct mempolicy *pol)
 		atomic_inc(&pol->refcnt);
 }
 
-extern int __mpol_equal(struct mempolicy *a, struct mempolicy *b);
-static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
+extern bool __mpol_equal(struct mempolicy *a, struct mempolicy *b);
+static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
 {
 	if (a == b)
-		return 1;
+		return true;
 	return __mpol_equal(a, b);
 }
 
@@ -257,9 +257,9 @@ static inline int vma_migratable(struct vm_area_struct *vma)
 
 struct mempolicy {};
 
-static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
+static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
 {
-	return 1;
+	return true;
 }
 
 static inline void mpol_put(struct mempolicy *p)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c3fdbcb17658..e3d58f088466 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1983,28 +1983,28 @@ struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol,
 }
 
 /* Slow path of a mempolicy comparison */
-int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
+bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
 {
 	if (!a || !b)
-		return 0;
+		return false;
 	if (a->mode != b->mode)
-		return 0;
+		return false;
 	if (a->flags != b->flags)
-		return 0;
+		return false;
 	if (mpol_store_user_nodemask(a))
 		if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask))
-			return 0;
+			return false;
 
 	switch (a->mode) {
 	case MPOL_BIND:
 		/* Fall through */
 	case MPOL_INTERLEAVE:
-		return nodes_equal(a->v.nodes, b->v.nodes);
+		return !!nodes_equal(a->v.nodes, b->v.nodes);
 	case MPOL_PREFERRED:
 		return a->v.preferred_node == b->v.preferred_node;
 	default:
 		BUG();
-		return 0;
+		return false;
 	}
 }
 
-- 
cgit v1.2.3


From a6d511e5155406cd214d3af3ff9cffc69548b006 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 10 Jan 2012 15:09:40 -0800
Subject: leds: add driver for TCA6507 LED controller

TI's TCA6507 is the LED driver in the GTA04 Openmoko motherboard.  The
driver provides full support for brightness levels and hardware blinking.

This driver can drive each of 7 outputs as an LED or a GPIO output,
and provides hardware-assist blinking.

[akpm@linux-foundation.org: fix __mod_i2c_device_table alias]
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: NeilBrown <neilb@suse.de>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/leds/Kconfig         |   8 +
 drivers/leds/Makefile        |   1 +
 drivers/leds/leds-tca6507.c  | 779 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/leds-tca6507.h |  34 ++
 4 files changed, 822 insertions(+)
 create mode 100644 drivers/leds/leds-tca6507.c
 create mode 100644 include/linux/leds-tca6507.h

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 1b75a56ebd08..897a77dfa9d7 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -388,6 +388,14 @@ config LEDS_RENESAS_TPU
 	  pin function. The latter to support brightness control.
 	  Brightness control is supported but hardware blinking is not.
 
+config LEDS_TCA6507
+	tristate "LED Support for TCA6507 I2C chip"
+	depends on LEDS_CLASS && I2C
+	help
+	  This option enables support for LEDs connected to TC6507
+	  LED driver chips accessed via the I2C bus.
+	  Driver support brightness control and hardware-assisted blinking.
+
 config LEDS_TRIGGERS
 	bool "LED Trigger support"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index e4f6bf568880..5c9dc4b000d5 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_LEDS_GPIO)			+= leds-gpio.o
 obj-$(CONFIG_LEDS_LP3944)		+= leds-lp3944.o
 obj-$(CONFIG_LEDS_LP5521)		+= leds-lp5521.o
 obj-$(CONFIG_LEDS_LP5523)		+= leds-lp5523.o
+obj-$(CONFIG_LEDS_TCA6507)		+= leds-tca6507.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)		+= leds-clevo-mail.o
 obj-$(CONFIG_LEDS_HP6XX)		+= leds-hp6xx.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c
new file mode 100644
index 000000000000..133f89fb7071
--- /dev/null
+++ b/drivers/leds/leds-tca6507.c
@@ -0,0 +1,779 @@
+/*
+ * leds-tca6507
+ *
+ * The TCA6507 is a programmable LED controller that can drive 7
+ * separate lines either by holding them low, or by pulsing them
+ * with modulated width.
+ * The modulation can be varied in a simple pattern to produce a blink or
+ * double-blink.
+ *
+ * This driver can configure each line either as a 'GPIO' which is out-only
+ * (no pull-up) or as an LED with variable brightness and hardware-assisted
+ * blinking.
+ *
+ * Apart from OFF and ON there are three programmable brightness levels which
+ * can be programmed from 0 to 15 and indicate how many 500usec intervals in
+ * each 8msec that the led is 'on'.  The levels are named MASTER, BANK0 and
+ * BANK1.
+ *
+ * There are two different blink rates that can be programmed, each with
+ * separate time for rise, on, fall, off and second-off.  Thus if 3 or more
+ * different non-trivial rates are required, software must be used for the extra
+ * rates. The two different blink rates must align with the two levels BANK0 and
+ * BANK1.
+ * This driver does not support double-blink so 'second-off' always matches
+ * 'off'.
+ *
+ * Only 16 different times can be programmed in a roughly logarithmic scale from
+ * 64ms to 16320ms.  To be precise the possible times are:
+ *    0, 64, 128, 192, 256, 384, 512, 768,
+ *    1024, 1536, 2048, 3072, 4096, 5760, 8128, 16320
+ *
+ * Times that cannot be closely matched with these must be
+ * handled in software.  This driver allows 12.5% error in matching.
+ *
+ * This driver does not allow rise/fall rates to be set explicitly.  When trying
+ * to match a given 'on' or 'off' period, an appropriate pair of 'change' and
+ * 'hold' times are chosen to get a close match.  If the target delay is even,
+ * the 'change' number will be the smaller; if odd, the 'hold' number will be
+ * the smaller.
+
+ * Choosing pairs of delays with 12.5% errors allows us to match delays in the
+ * ranges: 56-72, 112-144, 168-216, 224-27504, 28560-36720.
+ * 26% of the achievable sums can be matched by multiple pairings. For example
+ * 1536 == 1536+0, 1024+512, or 768+768.  This driver will always choose the
+ * pairing with the least maximum - 768+768 in this case.  Other pairings are
+ * not available.
+ *
+ * Access to the 3 levels and 2 blinks are on a first-come, first-served basis.
+ * Access can be shared by multiple leds if they have the same level and
+ * either same blink rates, or some don't blink.
+ * When a led changes, it relinquishes access and tries again, so it might
+ * lose access to hardware blink.
+ * If a blink engine cannot be allocated, software blink is used.
+ * If the desired brightness cannot be allocated, the closest available non-zero
+ * brightness is used.  As 'full' is always available, the worst case would be
+ * to have two different blink rates at '1', with Max at '2', then other leds
+ * will have to choose between '2' and '16'.  Hopefully this is not likely.
+ *
+ * Each bank (BANK0 and BANK1) has two usage counts - LEDs using the brightness
+ * and LEDs using the blink.  It can only be reprogrammed when the appropriate
+ * counter is zero.  The MASTER level has a single usage count.
+ *
+ * Each Led has programmable 'on' and 'off' time as milliseconds.  With each
+ * there is a flag saying if it was explicitly requested or defaulted.
+ * Similarly the banks know if each time was explicit or a default.  Defaults
+ * are permitted to be changed freely - they are not recognised when matching.
+ *
+ *
+ * An led-tca6507 device must be provided with platform data.  This data
+ * lists for each output: the name, default trigger, and whether the signal
+ * is being used as a GPiO rather than an led.  'struct led_plaform_data'
+ * is used for this.  If 'name' is NULL, the output isn't used.  If 'flags'
+ * is TCA6507_MAKE_CPIO, the output is a GPO.
+ * The "struct led_platform_data" can be embedded in a
+ * "struct tca6507_platform_data" which adds a 'gpio_base' for the GPiOs,
+ * and a 'setup' callback which is called once the GPiOs are available.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/leds.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/workqueue.h>
+#include <linux/leds-tca6507.h>
+
+/* LED select registers determine the source that drives LED outputs */
+#define TCA6507_LS_LED_OFF	0x0	/* Output HI-Z (off) */
+#define TCA6507_LS_LED_OFF1	0x1	/* Output HI-Z (off) - not used */
+#define TCA6507_LS_LED_PWM0	0x2	/* Output LOW with Bank0 rate */
+#define TCA6507_LS_LED_PWM1	0x3	/* Output LOW with Bank1 rate */
+#define TCA6507_LS_LED_ON	0x4	/* Output LOW (on) */
+#define TCA6507_LS_LED_MIR	0x5	/* Output LOW with Master Intensity */
+#define TCA6507_LS_BLINK0	0x6	/* Blink at Bank0 rate */
+#define TCA6507_LS_BLINK1	0x7	/* Blink at Bank1 rate */
+
+enum {
+	BANK0,
+	BANK1,
+	MASTER,
+};
+static int bank_source[3] = {
+	TCA6507_LS_LED_PWM0,
+	TCA6507_LS_LED_PWM1,
+	TCA6507_LS_LED_MIR,
+};
+static int blink_source[2] = {
+	TCA6507_LS_BLINK0,
+	TCA6507_LS_BLINK1,
+};
+
+/* PWM registers */
+#define	TCA6507_REG_CNT			11
+
+/*
+ * 0x00, 0x01, 0x02 encode the TCA6507_LS_* values, each output
+ * owns one bit in each register
+ */
+#define	TCA6507_FADE_ON			0x03
+#define	TCA6507_FULL_ON			0x04
+#define	TCA6507_FADE_OFF		0x05
+#define	TCA6507_FIRST_OFF		0x06
+#define	TCA6507_SECOND_OFF		0x07
+#define	TCA6507_MAX_INTENSITY		0x08
+#define	TCA6507_MASTER_INTENSITY	0x09
+#define	TCA6507_INITIALIZE		0x0A
+
+#define	INIT_CODE			0x8
+
+#define TIMECODES 16
+static int time_codes[TIMECODES] = {
+	0, 64, 128, 192, 256, 384, 512, 768,
+	1024, 1536, 2048, 3072, 4096, 5760, 8128, 16320
+};
+
+/* Convert an led.brightness level (0..255) to a TCA6507 level (0..15) */
+static inline int TO_LEVEL(int brightness)
+{
+	return brightness >> 4;
+}
+
+/* ...and convert back */
+static inline int TO_BRIGHT(int level)
+{
+	if (level)
+		return (level << 4) | 0xf;
+	return 0;
+}
+
+#define NUM_LEDS 7
+struct tca6507_chip {
+	int			reg_set;	/* One bit per register where
+						 * a '1' means the register
+						 * should be written */
+	u8			reg_file[TCA6507_REG_CNT];
+	/* Bank 2 is Master Intensity and doesn't use times */
+	struct bank {
+		int level;
+		int ontime, offtime;
+		int on_dflt, off_dflt;
+		int time_use, level_use;
+	} bank[3];
+	struct i2c_client	*client;
+	struct work_struct	work;
+	spinlock_t		lock;
+
+	struct tca6507_led {
+		struct tca6507_chip	*chip;
+		struct led_classdev	led_cdev;
+		int			num;
+		int			ontime, offtime;
+		int			on_dflt, off_dflt;
+		int			bank;	/* Bank used, or -1 */
+		int			blink;	/* Set if hardware-blinking */
+	} leds[NUM_LEDS];
+#ifdef CONFIG_GPIOLIB
+	struct gpio_chip		gpio;
+	const char			*gpio_name[NUM_LEDS];
+	int				gpio_map[NUM_LEDS];
+#endif
+};
+
+static const struct i2c_device_id tca6507_id[] = {
+	{ "tca6507" },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tca6507_id);
+
+static int choose_times(int msec, int *c1p, int *c2p)
+{
+	/*
+	 * Choose two timecodes which add to 'msec' as near as possible.
+	 * The first returned is the 'on' or 'off' time.  The second is to be
+	 * used as a 'fade-on' or 'fade-off' time.  If 'msec' is even,
+	 * the first will not be smaller than the second.  If 'msec' is odd,
+	 * the first will not be larger than the second.
+	 * If we cannot get a sum within 1/8 of 'msec' fail with -EINVAL,
+	 * otherwise return the sum that was achieved, plus 1 if the first is
+	 * smaller.
+	 * If two possibilities are equally good (e.g. 512+0, 256+256), choose
+	 * the first pair so there is more change-time visible (i.e. it is
+	 * softer).
+	 */
+	int c1, c2;
+	int tmax = msec * 9 / 8;
+	int tmin = msec * 7 / 8;
+	int diff = 65536;
+
+	/* We start at '1' to ensure we never even think of choosing a
+	 * total time of '0'.
+	 */
+	for (c1 = 1; c1 < TIMECODES; c1++) {
+		int t = time_codes[c1];
+		if (t*2 < tmin)
+			continue;
+		if (t > tmax)
+			break;
+		for (c2 = 0; c2 <= c1; c2++) {
+			int tt = t + time_codes[c2];
+			int d;
+			if (tt < tmin)
+				continue;
+			if (tt > tmax)
+				break;
+			/* This works! */
+			d = abs(msec - tt);
+			if (d >= diff)
+				continue;
+			/* Best yet */
+			*c1p = c1;
+			*c2p = c2;
+			diff = d;
+			if (d == 0)
+				return msec;
+		}
+	}
+	if (diff < 65536) {
+		int actual;
+		if (msec & 1) {
+			c1 = *c2p;
+			*c2p = *c1p;
+			*c1p = c1;
+		}
+		actual = time_codes[*c1p] + time_codes[*c2p];
+		if (*c1p < *c2p)
+			return actual + 1;
+		else
+			return actual;
+	}
+	/* No close match */
+	return -EINVAL;
+}
+
+/*
+ * Update the register file with the appropriate 3-bit state for
+ * the given led.
+ */
+static void set_select(struct tca6507_chip *tca, int led, int val)
+{
+	int mask = (1 << led);
+	int bit;
+
+	for (bit = 0; bit < 3; bit++) {
+		int n = tca->reg_file[bit] & ~mask;
+		if (val & (1 << bit))
+			n |= mask;
+		if (tca->reg_file[bit] != n) {
+			tca->reg_file[bit] = n;
+			tca->reg_set |= (1 << bit);
+		}
+	}
+}
+
+/* Update the register file with the appropriate 4-bit code for
+ * one bank or other.  This can be used for timers, for levels, or
+ * for initialisation.
+ */
+static void set_code(struct tca6507_chip *tca, int reg, int bank, int new)
+{
+	int mask = 0xF;
+	int n;
+	if (bank) {
+		mask <<= 4;
+		new <<= 4;
+	}
+	n = tca->reg_file[reg] & ~mask;
+	n |= new;
+	if (tca->reg_file[reg] != n) {
+		tca->reg_file[reg] = n;
+		tca->reg_set |= 1 << reg;
+	}
+}
+
+/* Update brightness level. */
+static void set_level(struct tca6507_chip *tca, int bank, int level)
+{
+	switch (bank) {
+	case BANK0:
+	case BANK1:
+		set_code(tca, TCA6507_MAX_INTENSITY, bank, level);
+		break;
+	case MASTER:
+		set_code(tca, TCA6507_MASTER_INTENSITY, 0, level);
+		break;
+	}
+	tca->bank[bank].level = level;
+}
+
+/* Record all relevant time code for a given bank */
+static void set_times(struct tca6507_chip *tca, int bank)
+{
+	int c1, c2;
+	int result;
+
+	result = choose_times(tca->bank[bank].ontime, &c1, &c2);
+	dev_dbg(&tca->client->dev,
+		"Chose on  times %d(%d) %d(%d) for %dms\n", c1, time_codes[c1],
+		c2, time_codes[c2], tca->bank[bank].ontime);
+	set_code(tca, TCA6507_FADE_ON, bank, c2);
+	set_code(tca, TCA6507_FULL_ON, bank, c1);
+	tca->bank[bank].ontime = result;
+
+	result = choose_times(tca->bank[bank].offtime, &c1, &c2);
+	dev_dbg(&tca->client->dev,
+		"Chose off times %d(%d) %d(%d) for %dms\n", c1, time_codes[c1],
+		c2, time_codes[c2], tca->bank[bank].offtime);
+	set_code(tca, TCA6507_FADE_OFF, bank, c2);
+	set_code(tca, TCA6507_FIRST_OFF, bank, c1);
+	set_code(tca, TCA6507_SECOND_OFF, bank, c1);
+	tca->bank[bank].offtime = result;
+
+	set_code(tca, TCA6507_INITIALIZE, bank, INIT_CODE);
+}
+
+/* Write all needed register of tca6507 */
+
+static void tca6507_work(struct work_struct *work)
+{
+	struct tca6507_chip *tca = container_of(work, struct tca6507_chip,
+						work);
+	struct i2c_client *cl = tca->client;
+	int set;
+	u8 file[TCA6507_REG_CNT];
+	int r;
+
+	spin_lock_irq(&tca->lock);
+	set = tca->reg_set;
+	memcpy(file, tca->reg_file, TCA6507_REG_CNT);
+	tca->reg_set = 0;
+	spin_unlock_irq(&tca->lock);
+
+	for (r = 0; r < TCA6507_REG_CNT; r++)
+		if (set & (1<<r))
+			i2c_smbus_write_byte_data(cl, r, file[r]);
+}
+
+static void led_release(struct tca6507_led *led)
+{
+	/* If led owns any resource, release it. */
+	struct tca6507_chip *tca = led->chip;
+	if (led->bank >= 0) {
+		struct bank *b = tca->bank + led->bank;
+		if (led->blink)
+			b->time_use--;
+		b->level_use--;
+	}
+	led->blink = 0;
+	led->bank = -1;
+}
+
+static int led_prepare(struct tca6507_led *led)
+{
+	/* Assign this led to a bank, configuring that bank if necessary. */
+	int level = TO_LEVEL(led->led_cdev.brightness);
+	struct tca6507_chip *tca = led->chip;
+	int c1, c2;
+	int i;
+	struct bank *b;
+	int need_init = 0;
+
+	led->led_cdev.brightness = TO_BRIGHT(level);
+	if (level == 0) {
+		set_select(tca, led->num, TCA6507_LS_LED_OFF);
+		return 0;
+	}
+
+	if (led->ontime == 0 || led->offtime == 0) {
+		/*
+		 * Just set the brightness, choosing first usable bank.
+		 * If none perfect, choose best.
+		 * Count backwards so we check MASTER bank first
+		 * to avoid wasting a timer.
+		 */
+		int best = -1;/* full-on */
+		int diff = 15-level;
+
+		if (level == 15) {
+			set_select(tca, led->num, TCA6507_LS_LED_ON);
+			return 0;
+		}
+
+		for (i = MASTER; i >= BANK0; i--) {
+			int d;
+			if (tca->bank[i].level == level ||
+			    tca->bank[i].level_use == 0) {
+				best = i;
+				break;
+			}
+			d = abs(level - tca->bank[i].level);
+			if (d < diff) {
+				diff = d;
+				best = i;
+			}
+		}
+		if (best == -1) {
+			/* Best brightness is full-on */
+			set_select(tca, led->num, TCA6507_LS_LED_ON);
+			led->led_cdev.brightness = LED_FULL;
+			return 0;
+		}
+
+		if (!tca->bank[best].level_use)
+			set_level(tca, best, level);
+
+		tca->bank[best].level_use++;
+		led->bank = best;
+		set_select(tca, led->num, bank_source[best]);
+		led->led_cdev.brightness = TO_BRIGHT(tca->bank[best].level);
+		return 0;
+	}
+
+	/*
+	 * We have on/off time so we need to try to allocate a timing bank.
+	 * First check if times are compatible with hardware and give up if
+	 * not.
+	 */
+	if (choose_times(led->ontime, &c1, &c2) < 0)
+		return -EINVAL;
+	if (choose_times(led->offtime, &c1, &c2) < 0)
+		return -EINVAL;
+
+	for (i = BANK0; i <= BANK1; i++) {
+		if (tca->bank[i].level_use == 0)
+			/* not in use - it is ours! */
+			break;
+		if (tca->bank[i].level != level)
+			/* Incompatible level - skip */
+			/* FIX: if timer matches we maybe should consider
+			 * this anyway...
+			 */
+			continue;
+
+		if (tca->bank[i].time_use == 0)
+			/* Timer not in use, and level matches - use it */
+			break;
+
+		if (!(tca->bank[i].on_dflt ||
+		      led->on_dflt ||
+		      tca->bank[i].ontime == led->ontime))
+			/* on time is incompatible */
+			continue;
+
+		if (!(tca->bank[i].off_dflt ||
+		      led->off_dflt ||
+		      tca->bank[i].offtime == led->offtime))
+			/* off time is incompatible */
+			continue;
+
+		/* looks like a suitable match */
+		break;
+	}
+
+	if (i > BANK1)
+		/* Nothing matches - how sad */
+		return -EINVAL;
+
+	b = &tca->bank[i];
+	if (b->level_use == 0)
+		set_level(tca, i, level);
+	b->level_use++;
+	led->bank = i;
+
+	if (b->on_dflt ||
+	    !led->on_dflt ||
+	    b->time_use == 0) {
+		b->ontime = led->ontime;
+		b->on_dflt = led->on_dflt;
+		need_init = 1;
+	}
+
+	if (b->off_dflt ||
+	    !led->off_dflt ||
+	    b->time_use == 0) {
+		b->offtime = led->offtime;
+		b->off_dflt = led->off_dflt;
+		need_init = 1;
+	}
+
+	if (need_init)
+		set_times(tca, i);
+
+	led->ontime = b->ontime;
+	led->offtime = b->offtime;
+
+	b->time_use++;
+	led->blink = 1;
+	led->led_cdev.brightness = TO_BRIGHT(b->level);
+	set_select(tca, led->num, blink_source[i]);
+	return 0;
+}
+
+static int led_assign(struct tca6507_led *led)
+{
+	struct tca6507_chip *tca = led->chip;
+	int err;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tca->lock, flags);
+	led_release(led);
+	err = led_prepare(led);
+	if (err) {
+		/*
+		 * Can only fail on timer setup.  In that case we need to
+		 * re-establish as steady level.
+		 */
+		led->ontime = 0;
+		led->offtime = 0;
+		led_prepare(led);
+	}
+	spin_unlock_irqrestore(&tca->lock, flags);
+
+	if (tca->reg_set)
+		schedule_work(&tca->work);
+	return err;
+}
+
+static void tca6507_brightness_set(struct led_classdev *led_cdev,
+				   enum led_brightness brightness)
+{
+	struct tca6507_led *led = container_of(led_cdev, struct tca6507_led,
+					       led_cdev);
+	led->led_cdev.brightness = brightness;
+	led->ontime = 0;
+	led->offtime = 0;
+	led_assign(led);
+}
+
+static int tca6507_blink_set(struct led_classdev *led_cdev,
+			     unsigned long *delay_on,
+			     unsigned long *delay_off)
+{
+	struct tca6507_led *led = container_of(led_cdev, struct tca6507_led,
+					       led_cdev);
+
+	if (*delay_on == 0)
+		led->on_dflt = 1;
+	else if (delay_on != &led_cdev->blink_delay_on)
+		led->on_dflt = 0;
+	led->ontime = *delay_on;
+
+	if (*delay_off == 0)
+		led->off_dflt = 1;
+	else if (delay_off != &led_cdev->blink_delay_off)
+		led->off_dflt = 0;
+	led->offtime = *delay_off;
+
+	if (led->ontime == 0)
+		led->ontime = 512;
+	if (led->offtime == 0)
+		led->offtime = 512;
+
+	if (led->led_cdev.brightness == LED_OFF)
+		led->led_cdev.brightness = LED_FULL;
+	if (led_assign(led) < 0) {
+		led->ontime = 0;
+		led->offtime = 0;
+		led->led_cdev.brightness = LED_OFF;
+		return -EINVAL;
+	}
+	*delay_on = led->ontime;
+	*delay_off = led->offtime;
+	return 0;
+}
+
+#ifdef CONFIG_GPIOLIB
+static void tca6507_gpio_set_value(struct gpio_chip *gc,
+				   unsigned offset, int val)
+{
+	struct tca6507_chip *tca = container_of(gc, struct tca6507_chip, gpio);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tca->lock, flags);
+	/*
+	 * 'OFF' is floating high, and 'ON' is pulled down, so it has the
+	 * inverse sense of 'val'.
+	 */
+	set_select(tca, tca->gpio_map[offset],
+		   val ? TCA6507_LS_LED_OFF : TCA6507_LS_LED_ON);
+	spin_unlock_irqrestore(&tca->lock, flags);
+	if (tca->reg_set)
+		schedule_work(&tca->work);
+}
+
+static int tca6507_gpio_direction_output(struct gpio_chip *gc,
+					  unsigned offset, int val)
+{
+	tca6507_gpio_set_value(gc, offset, val);
+	return 0;
+}
+
+static int tca6507_probe_gpios(struct i2c_client *client,
+			       struct tca6507_chip *tca,
+			       struct tca6507_platform_data *pdata)
+{
+	int err;
+	int i = 0;
+	int gpios = 0;
+
+	for (i = 0; i < NUM_LEDS; i++)
+		if (pdata->leds.leds[i].name && pdata->leds.leds[i].flags) {
+			/* Configure as a gpio */
+			tca->gpio_name[gpios] = pdata->leds.leds[i].name;
+			tca->gpio_map[gpios] = i;
+			gpios++;
+		}
+
+	if (!gpios)
+		return 0;
+
+	tca->gpio.label = "gpio-tca6507";
+	tca->gpio.names = tca->gpio_name;
+	tca->gpio.ngpio = gpios;
+	tca->gpio.base = pdata->gpio_base;
+	tca->gpio.owner = THIS_MODULE;
+	tca->gpio.direction_output = tca6507_gpio_direction_output;
+	tca->gpio.set = tca6507_gpio_set_value;
+	tca->gpio.dev = &client->dev;
+	err = gpiochip_add(&tca->gpio);
+	if (err) {
+		tca->gpio.ngpio = 0;
+		return err;
+	}
+	if (pdata->setup)
+		pdata->setup(tca->gpio.base, tca->gpio.ngpio);
+	return 0;
+}
+
+static void tca6507_remove_gpio(struct tca6507_chip *tca)
+{
+	if (tca->gpio.ngpio) {
+		int err = gpiochip_remove(&tca->gpio);
+		dev_err(&tca->client->dev, "%s failed, %d\n",
+			"gpiochip_remove()", err);
+	}
+}
+#else /* CONFIG_GPIOLIB */
+static int tca6507_probe_gpios(struct i2c_client *client,
+			       struct tca6507_chip *tca,
+			       struct tca6507_platform_data *pdata)
+{
+	return 0;
+}
+static void tca6507_remove_gpio(struct tca6507_chip *tca)
+{
+}
+#endif /* CONFIG_GPIOLIB */
+
+static int __devinit tca6507_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct tca6507_chip *tca;
+	struct i2c_adapter *adapter;
+	struct tca6507_platform_data *pdata;
+	int err;
+	int i = 0;
+
+	adapter = to_i2c_adapter(client->dev.parent);
+	pdata = client->dev.platform_data;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
+		return -EIO;
+
+	if (!pdata || pdata->leds.num_leds != NUM_LEDS) {
+		dev_err(&client->dev, "Need %d entries in platform-data list\n",
+			NUM_LEDS);
+		return -ENODEV;
+	}
+	err = -ENOMEM;
+	tca = kzalloc(sizeof(*tca), GFP_KERNEL);
+	if (!tca)
+		goto exit;
+
+	tca->client = client;
+	INIT_WORK(&tca->work, tca6507_work);
+	spin_lock_init(&tca->lock);
+	i2c_set_clientdata(client, tca);
+
+	for (i = 0; i < NUM_LEDS; i++) {
+		struct tca6507_led *l = tca->leds + i;
+
+		l->chip = tca;
+		l->num = i;
+		if (pdata->leds.leds[i].name && !pdata->leds.leds[i].flags) {
+			l->led_cdev.name = pdata->leds.leds[i].name;
+			l->led_cdev.default_trigger
+				= pdata->leds.leds[i].default_trigger;
+			l->led_cdev.brightness_set = tca6507_brightness_set;
+			l->led_cdev.blink_set = tca6507_blink_set;
+			l->bank = -1;
+			err = led_classdev_register(&client->dev,
+						    &l->led_cdev);
+			if (err < 0)
+				goto exit;
+		}
+	}
+	err = tca6507_probe_gpios(client, tca, pdata);
+	if (err)
+		goto exit;
+	/* set all registers to known state - zero */
+	tca->reg_set = 0x7f;
+	schedule_work(&tca->work);
+
+	return 0;
+exit:
+	while (i--)
+		if (tca->leds[i].led_cdev.name)
+			led_classdev_unregister(&tca->leds[i].led_cdev);
+	cancel_work_sync(&tca->work);
+	i2c_set_clientdata(client, NULL);
+	kfree(tca);
+	return err;
+}
+
+static int __devexit tca6507_remove(struct i2c_client *client)
+{
+	int i;
+	struct tca6507_chip *tca = i2c_get_clientdata(client);
+	struct tca6507_led *tca_leds = tca->leds;
+
+	for (i = 0; i < NUM_LEDS; i++) {
+		if (tca_leds[i].led_cdev.name)
+			led_classdev_unregister(&tca_leds[i].led_cdev);
+	}
+	tca6507_remove_gpio(tca);
+	cancel_work_sync(&tca->work);
+	i2c_set_clientdata(client, NULL);
+	kfree(tca);
+
+	return 0;
+}
+
+static struct i2c_driver tca6507_driver = {
+	.driver   = {
+		.name    = "leds-tca6507",
+		.owner   = THIS_MODULE,
+	},
+	.probe    = tca6507_probe,
+	.remove   = __devexit_p(tca6507_remove),
+	.id_table = tca6507_id,
+};
+
+static int __init tca6507_leds_init(void)
+{
+	return i2c_add_driver(&tca6507_driver);
+}
+
+static void __exit tca6507_leds_exit(void)
+{
+	i2c_del_driver(&tca6507_driver);
+}
+
+module_init(tca6507_leds_init);
+module_exit(tca6507_leds_exit);
+
+MODULE_AUTHOR("NeilBrown <neilb@suse.de>");
+MODULE_DESCRIPTION("TCA6507 LED/GPO driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/leds-tca6507.h b/include/linux/leds-tca6507.h
new file mode 100644
index 000000000000..dcabf4fa2aef
--- /dev/null
+++ b/include/linux/leds-tca6507.h
@@ -0,0 +1,34 @@
+/*
+ * TCA6507 LED chip driver.
+ *
+ * Copyright (C) 2011 Neil Brown <neil@brown.name>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef __LINUX_TCA6507_H
+#define __LINUX_TCA6507_H
+#include <linux/leds.h>
+
+struct tca6507_platform_data {
+	struct led_platform_data leds;
+#ifdef CONFIG_GPIOLIB
+	int gpio_base;
+	void (*setup)(unsigned gpio_base, unsigned ngpio);
+#endif
+};
+
+#define	TCA6507_MAKE_GPIO 1
+#endif /* __LINUX_TCA6507_H*/
-- 
cgit v1.2.3


From 5e6292c0f28f03dfdb8ea3d685f0b838a23bfba4 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 10 Jan 2012 15:11:17 -0800
Subject: signal: add block_sigmask() for adding sigmask to current->blocked

Abstract the code sequence for adding a signal handler's sa_mask to
current->blocked because the sequence is identical for all architectures.
Furthermore, in the past some architectures actually got this code wrong,
so introduce a wrapper that all architectures can use.

Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/signal.c |  6 +-----
 include/linux/signal.h   |  1 +
 kernel/signal.c          | 21 +++++++++++++++++++++
 3 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 54ddaeb221c1..46a01bdc27e2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -682,7 +682,6 @@ static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 		struct pt_regs *regs)
 {
-	sigset_t blocked;
 	int ret;
 
 	/* Are we from a system call? */
@@ -733,10 +732,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
 
-	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
-	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&blocked, sig);
-	set_current_blocked(&blocked);
+	block_sigmask(ka, sig);
 
 	tracehook_signal_handler(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLESTEP));
diff --git a/include/linux/signal.h b/include/linux/signal.h
index a822300a253b..7987ce74874b 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -254,6 +254,7 @@ extern void set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
+extern void block_sigmask(struct k_sigaction *ka, int signr);
 extern void exit_signals(struct task_struct *tsk);
 
 extern struct kmem_cache *sighand_cachep;
diff --git a/kernel/signal.c b/kernel/signal.c
index bb0efa5705ed..d532f1709fbf 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2318,6 +2318,27 @@ relock:
 	return signr;
 }
 
+/**
+ * block_sigmask - add @ka's signal mask to current->blocked
+ * @ka: action for @signr
+ * @signr: signal that has been successfully delivered
+ *
+ * This function should be called when a signal has succesfully been
+ * delivered. It adds the mask of signals for @ka to current->blocked
+ * so that they are blocked during the execution of the signal
+ * handler. In addition, @signr will be blocked unless %SA_NODEFER is
+ * set in @ka->sa.sa_flags.
+ */
+void block_sigmask(struct k_sigaction *ka, int signr)
+{
+	sigset_t blocked;
+
+	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&blocked, signr);
+	set_current_blocked(&blocked);
+}
+
 /*
  * It could be that complete_signal() picked us to notify about the
  * group-wide signal. Other threads should be notified now to take
-- 
cgit v1.2.3


From 7773fbc54182a90cd248656619c7d33859e5f91d Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 10 Jan 2012 15:11:20 -0800
Subject: procfs: make proc_get_link to use dentry instead of inode

Prepare the ground for the next "map_files" patch which needs a name of a
link file to analyse.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c          | 20 ++++++++++----------
 include/linux/proc_fs.h |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1aab5fe05a1b..e31d95055c67 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -166,9 +166,9 @@ static int get_task_root(struct task_struct *task, struct path *root)
 	return result;
 }
 
-static int proc_cwd_link(struct inode *inode, struct path *path)
+static int proc_cwd_link(struct dentry *dentry, struct path *path)
 {
-	struct task_struct *task = get_proc_task(inode);
+	struct task_struct *task = get_proc_task(dentry->d_inode);
 	int result = -ENOENT;
 
 	if (task) {
@@ -183,9 +183,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path)
 	return result;
 }
 
-static int proc_root_link(struct inode *inode, struct path *path)
+static int proc_root_link(struct dentry *dentry, struct path *path)
 {
-	struct task_struct *task = get_proc_task(inode);
+	struct task_struct *task = get_proc_task(dentry->d_inode);
 	int result = -ENOENT;
 
 	if (task) {
@@ -1456,13 +1456,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
 	.release	= single_release,
 };
 
-static int proc_exe_link(struct inode *inode, struct path *exe_path)
+static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
 {
 	struct task_struct *task;
 	struct mm_struct *mm;
 	struct file *exe_file;
 
-	task = get_proc_task(inode);
+	task = get_proc_task(dentry->d_inode);
 	if (!task)
 		return -ENOENT;
 	mm = get_task_mm(task);
@@ -1492,7 +1492,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
+	error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
 out:
 	return ERR_PTR(error);
 }
@@ -1531,7 +1531,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
 	if (!proc_fd_access_allowed(inode))
 		goto out;
 
-	error = PROC_I(inode)->op.proc_get_link(inode, &path);
+	error = PROC_I(inode)->op.proc_get_link(dentry, &path);
 	if (error)
 		goto out;
 
@@ -1823,9 +1823,9 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
 	return -ENOENT;
 }
 
-static int proc_fd_link(struct inode *inode, struct path *path)
+static int proc_fd_link(struct dentry *dentry, struct path *path)
 {
-	return proc_fd_info(inode, path, NULL);
+	return proc_fd_info(dentry->d_inode, path, NULL);
 }
 
 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 6d9e575519cc..85c507306239 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -253,7 +253,7 @@ extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
 
 union proc_op {
-	int (*proc_get_link)(struct inode *, struct path *);
+	int (*proc_get_link)(struct dentry *, struct path *);
 	int (*proc_read)(struct task_struct *task, char *page);
 	int (*proc_show)(struct seq_file *m,
 		struct pid_namespace *ns, struct pid *pid,
-- 
cgit v1.2.3


From 640708a2cff7f81e246243b0073c66e6ece7e53e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 10 Jan 2012 15:11:23 -0800
Subject: procfs: introduce the /proc/<pid>/map_files/ directory

This one behaves similarly to the /proc/<pid>/fd/ one - it contains
symlinks one for each mapping with file, the name of a symlink is
"vma->vm_start-vma->vm_end", the target is the file.  Opening a symlink
results in a file that point exactly to the same inode as them vma's one.

For example the ls -l of some arbitrary /proc/<pid>/map_files/

 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80403000-7f8f80404000 -> /lib64/libc-2.5.so
 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f8061e000-7f8f80620000 -> /lib64/libselinux.so.1
 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80826000-7f8f80827000 -> /lib64/libacl.so.1.1.0
 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80a2f000-7f8f80a30000 -> /lib64/librt-2.5.so
 | lr-x------ 1 root root 64 Aug 26 06:40 7f8f80a30000-7f8f80a4c000 -> /lib64/ld-2.5.so

This *helps* checkpointing process in three ways:

1. When dumping a task mappings we do know exact file that is mapped
   by particular region.  We do this by opening
   /proc/$pid/map_files/$address symlink the way we do with file
   descriptors.

2. This also helps in determining which anonymous shared mappings are
   shared with each other by comparing the inodes of them.

3. When restoring a set of processes in case two of them has a mapping
   shared, we map the memory by the 1st one and then open its
   /proc/$pid/map_files/$address file and map it by the 2nd task.

Using /proc/$pid/maps for this is quite inconvenient since it brings
repeatable re-reading and reparsing for this text file which slows down
restore procedure significantly.  Also as being pointed in (3) it is a way
easier to use top level shared mapping in children as
/proc/$pid/map_files/$address when needed.

[akpm@linux-foundation.org: coding-style fixes]
[gorcunov@openvz.org: make map_files depend on CHECKPOINT_RESTORE]
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Vasiliy Kulikov <segoon@openwall.com>
Reviewed-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Tejun Heo <tj@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c     | 355 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h |  12 ++
 2 files changed, 367 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index e31d95055c67..4d755fed3ecb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -83,6 +83,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#include <linux/flex_array.h>
 #ifdef CONFIG_HARDWALL
 #include <asm/hardwall.h>
 #endif
@@ -134,6 +135,8 @@ struct pid_entry {
 		NULL, &proc_single_file_operations,	\
 		{ .proc_show = show } )
 
+static int proc_fd_permission(struct inode *inode, int mask);
+
 /*
  * Count the number of hardlinks for the pid_entry table, excluding the .
  * and .. links.
@@ -2046,6 +2049,355 @@ static const struct file_operations proc_fd_operations = {
 	.llseek		= default_llseek,
 };
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+
+/*
+ * dname_to_vma_addr - maps a dentry name into two unsigned longs
+ * which represent vma start and end addresses.
+ */
+static int dname_to_vma_addr(struct dentry *dentry,
+			     unsigned long *start, unsigned long *end)
+{
+	if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+	unsigned long vm_start, vm_end;
+	bool exact_vma_exists = false;
+	struct mm_struct *mm = NULL;
+	struct task_struct *task;
+	const struct cred *cred;
+	struct inode *inode;
+	int status = 0;
+
+	if (nd && nd->flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		status = -EACCES;
+		goto out_notask;
+	}
+
+	inode = dentry->d_inode;
+	task = get_proc_task(inode);
+	if (!task)
+		goto out_notask;
+
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto out;
+
+	mm = get_task_mm(task);
+	if (!mm)
+		goto out;
+
+	if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
+		down_read(&mm->mmap_sem);
+		exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end);
+		up_read(&mm->mmap_sem);
+	}
+
+	mmput(mm);
+
+	if (exact_vma_exists) {
+		if (task_dumpable(task)) {
+			rcu_read_lock();
+			cred = __task_cred(task);
+			inode->i_uid = cred->euid;
+			inode->i_gid = cred->egid;
+			rcu_read_unlock();
+		} else {
+			inode->i_uid = 0;
+			inode->i_gid = 0;
+		}
+		security_task_to_inode(task, inode);
+		status = 1;
+	}
+
+out:
+	put_task_struct(task);
+
+out_notask:
+	if (status <= 0)
+		d_drop(dentry);
+
+	return status;
+}
+
+static const struct dentry_operations tid_map_files_dentry_operations = {
+	.d_revalidate	= map_files_d_revalidate,
+	.d_delete	= pid_delete_dentry,
+};
+
+static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
+{
+	unsigned long vm_start, vm_end;
+	struct vm_area_struct *vma;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	int rc;
+
+	rc = -ENOENT;
+	task = get_proc_task(dentry->d_inode);
+	if (!task)
+		goto out;
+
+	mm = get_task_mm(task);
+	put_task_struct(task);
+	if (!mm)
+		goto out;
+
+	rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
+	if (rc)
+		goto out_mmput;
+
+	down_read(&mm->mmap_sem);
+	vma = find_exact_vma(mm, vm_start, vm_end);
+	if (vma && vma->vm_file) {
+		*path = vma->vm_file->f_path;
+		path_get(path);
+		rc = 0;
+	}
+	up_read(&mm->mmap_sem);
+
+out_mmput:
+	mmput(mm);
+out:
+	return rc;
+}
+
+struct map_files_info {
+	struct file	*file;
+	unsigned long	len;
+	unsigned char	name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
+};
+
+static struct dentry *
+proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
+			   struct task_struct *task, const void *ptr)
+{
+	const struct file *file = ptr;
+	struct proc_inode *ei;
+	struct inode *inode;
+
+	if (!file)
+		return ERR_PTR(-ENOENT);
+
+	inode = proc_pid_make_inode(dir->i_sb, task);
+	if (!inode)
+		return ERR_PTR(-ENOENT);
+
+	ei = PROC_I(inode);
+	ei->op.proc_get_link = proc_map_files_get_link;
+
+	inode->i_op = &proc_pid_link_inode_operations;
+	inode->i_size = 64;
+	inode->i_mode = S_IFLNK;
+
+	if (file->f_mode & FMODE_READ)
+		inode->i_mode |= S_IRUSR;
+	if (file->f_mode & FMODE_WRITE)
+		inode->i_mode |= S_IWUSR;
+
+	d_set_d_op(dentry, &tid_map_files_dentry_operations);
+	d_add(dentry, inode);
+
+	return NULL;
+}
+
+static struct dentry *proc_map_files_lookup(struct inode *dir,
+		struct dentry *dentry, struct nameidata *nd)
+{
+	unsigned long vm_start, vm_end;
+	struct vm_area_struct *vma;
+	struct task_struct *task;
+	struct dentry *result;
+	struct mm_struct *mm;
+
+	result = ERR_PTR(-EACCES);
+	if (!capable(CAP_SYS_ADMIN))
+		goto out;
+
+	result = ERR_PTR(-ENOENT);
+	task = get_proc_task(dir);
+	if (!task)
+		goto out;
+
+	result = ERR_PTR(-EACCES);
+	if (lock_trace(task))
+		goto out_put_task;
+
+	result = ERR_PTR(-ENOENT);
+	if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
+		goto out_unlock;
+
+	mm = get_task_mm(task);
+	if (!mm)
+		goto out_unlock;
+
+	down_read(&mm->mmap_sem);
+	vma = find_exact_vma(mm, vm_start, vm_end);
+	if (!vma)
+		goto out_no_vma;
+
+	result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file);
+
+out_no_vma:
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+out_unlock:
+	unlock_trace(task);
+out_put_task:
+	put_task_struct(task);
+out:
+	return result;
+}
+
+static const struct inode_operations proc_map_files_inode_operations = {
+	.lookup		= proc_map_files_lookup,
+	.permission	= proc_fd_permission,
+	.setattr	= proc_setattr,
+};
+
+static int
+proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	struct vm_area_struct *vma;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	ino_t ino;
+	int ret;
+
+	ret = -EACCES;
+	if (!capable(CAP_SYS_ADMIN))
+		goto out;
+
+	ret = -ENOENT;
+	task = get_proc_task(inode);
+	if (!task)
+		goto out;
+
+	ret = -EACCES;
+	if (lock_trace(task))
+		goto out_put_task;
+
+	ret = 0;
+	switch (filp->f_pos) {
+	case 0:
+		ino = inode->i_ino;
+		if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
+			goto out_unlock;
+		filp->f_pos++;
+	case 1:
+		ino = parent_ino(dentry);
+		if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+			goto out_unlock;
+		filp->f_pos++;
+	default:
+	{
+		unsigned long nr_files, pos, i;
+		struct flex_array *fa = NULL;
+		struct map_files_info info;
+		struct map_files_info *p;
+
+		mm = get_task_mm(task);
+		if (!mm)
+			goto out_unlock;
+		down_read(&mm->mmap_sem);
+
+		nr_files = 0;
+
+		/*
+		 * We need two passes here:
+		 *
+		 *  1) Collect vmas of mapped files with mmap_sem taken
+		 *  2) Release mmap_sem and instantiate entries
+		 *
+		 * otherwise we get lockdep complained, since filldir()
+		 * routine might require mmap_sem taken in might_fault().
+		 */
+
+		for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
+			if (vma->vm_file && ++pos > filp->f_pos)
+				nr_files++;
+		}
+
+		if (nr_files) {
+			fa = flex_array_alloc(sizeof(info), nr_files,
+						GFP_KERNEL);
+			if (!fa || flex_array_prealloc(fa, 0, nr_files,
+							GFP_KERNEL)) {
+				ret = -ENOMEM;
+				if (fa)
+					flex_array_free(fa);
+				up_read(&mm->mmap_sem);
+				mmput(mm);
+				goto out_unlock;
+			}
+			for (i = 0, vma = mm->mmap, pos = 2; vma;
+					vma = vma->vm_next) {
+				if (!vma->vm_file)
+					continue;
+				if (++pos <= filp->f_pos)
+					continue;
+
+				get_file(vma->vm_file);
+				info.file = vma->vm_file;
+				info.len = snprintf(info.name,
+						sizeof(info.name), "%lx-%lx",
+						vma->vm_start, vma->vm_end);
+				if (flex_array_put(fa, i++, &info, GFP_KERNEL))
+					BUG();
+			}
+		}
+		up_read(&mm->mmap_sem);
+
+		for (i = 0; i < nr_files; i++) {
+			p = flex_array_get(fa, i);
+			ret = proc_fill_cache(filp, dirent, filldir,
+					      p->name, p->len,
+					      proc_map_files_instantiate,
+					      task, p->file);
+			if (ret)
+				break;
+			filp->f_pos++;
+			fput(p->file);
+		}
+		for (; i < nr_files; i++) {
+			/*
+			 * In case of error don't forget
+			 * to put rest of file refs.
+			 */
+			p = flex_array_get(fa, i);
+			fput(p->file);
+		}
+		if (fa)
+			flex_array_free(fa);
+		mmput(mm);
+	}
+	}
+
+out_unlock:
+	unlock_trace(task);
+out_put_task:
+	put_task_struct(task);
+out:
+	return ret;
+}
+
+static const struct file_operations proc_map_files_operations = {
+	.read		= generic_read_dir,
+	.readdir	= proc_map_files_readdir,
+	.llseek		= default_llseek,
+};
+
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
 /*
  * /proc/pid/fd needs a special permission handler so that a process can still
  * access /proc/self/fd after it has executed a setuid().
@@ -2661,6 +3013,9 @@ static const struct inode_operations proc_task_inode_operations;
 static const struct pid_entry tgid_base_stuff[] = {
 	DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
 	DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
+#endif
 	DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
 	DIR("ns",	  S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
 #ifdef CONFIG_NET
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5568553a41fd..6eba2cc016c9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1482,6 +1482,18 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
 	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 }
 
+/* Look up the first VMA which exactly match the interval vm_start ... vm_end */
+static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
+				unsigned long vm_start, unsigned long vm_end)
+{
+	struct vm_area_struct *vma = find_vma(mm, vm_start);
+
+	if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end))
+		vma = NULL;
+
+	return vma;
+}
+
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
 #else
-- 
cgit v1.2.3


From 0499680a42141d86417a8fbaa8c8db806bea1201 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segooon@gmail.com>
Date: Tue, 10 Jan 2012 15:11:31 -0800
Subject: procfs: add hidepid= and gid= mount options

Add support for mount options to restrict access to /proc/PID/
directories.  The default backward-compatible "relaxed" behaviour is left
untouched.

The first mount option is called "hidepid" and its value defines how much
info about processes we want to be available for non-owners:

hidepid=0 (default) means the old behavior - anybody may read all
world-readable /proc/PID/* files.

hidepid=1 means users may not access any /proc/<pid>/ directories, but
their own.  Sensitive files like cmdline, sched*, status are now protected
against other users.  As permission checking done in proc_pid_permission()
and files' permissions are left untouched, programs expecting specific
files' modes are not confused.

hidepid=2 means hidepid=1 plus all /proc/PID/ will be invisible to other
users.  It doesn't mean that it hides whether a process exists (it can be
learned by other means, e.g.  by kill -0 $PID), but it hides process' euid
and egid.  It compicates intruder's task of gathering info about running
processes, whether some daemon runs with elevated privileges, whether
another user runs some sensitive program, whether other users run any
program at all, etc.

gid=XXX defines a group that will be able to gather all processes' info
(as in hidepid=0 mode).  This group should be used instead of putting
nonroot user in sudoers file or something.  However, untrusted users (like
daemons, etc.) which are not supposed to monitor the tasks in the whole
system should not be added to the group.

hidepid=1 or higher is designed to restrict access to procfs files, which
might reveal some sensitive private information like precise keystrokes
timings:

http://www.openwall.com/lists/oss-security/2011/11/05/3

hidepid=1/2 doesn't break monitoring userspace tools.  ps, top, pgrep, and
conky gracefully handle EPERM/ENOENT and behave as if the current user is
the only user running processes.  pstree shows the process subtree which
contains "pstree" process.

Note: the patch doesn't deal with setuid/setgid issues of keeping
preopened descriptors of procfs files (like
https://lkml.org/lkml/2011/2/7/368).  We rely on that the leaked
information like the scheduling counters of setuid apps doesn't threaten
anybody's privacy - only the user started the setuid program may read the
counters.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Greg KH <greg@kroah.com>
Cc: Theodore Tso <tytso@MIT.EDU>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: James Morris <jmorris@namei.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 39 +++++++++++++++++++++
 fs/proc/base.c                     | 69 +++++++++++++++++++++++++++++++++++++-
 fs/proc/inode.c                    |  8 +++++
 fs/proc/root.c                     | 21 ++++++++++--
 include/linux/pid_namespace.h      |  2 ++
 5 files changed, 135 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 0ec91f03422e..12fee132fbe2 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -41,6 +41,8 @@ Table of Contents
   3.5	/proc/<pid>/mountinfo - Information about mounts
   3.6	/proc/<pid>/comm  & /proc/<pid>/task/<tid>/comm
 
+  4	Configuring procfs
+  4.1	Mount options
 
 ------------------------------------------------------------------------------
 Preface
@@ -1542,3 +1544,40 @@ a task to set its own or one of its thread siblings comm value. The comm value
 is limited in size compared to the cmdline value, so writing anything longer
 then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated
 comm value.
+
+
+------------------------------------------------------------------------------
+Configuring procfs
+------------------------------------------------------------------------------
+
+4.1	Mount options
+---------------------
+
+The following mount options are supported:
+
+	hidepid=	Set /proc/<pid>/ access mode.
+	gid=		Set the group authorized to learn processes information.
+
+hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories
+(default).
+
+hidepid=1 means users may not access any /proc/<pid>/ directories but their
+own.  Sensitive files like cmdline, sched*, status are now protected against
+other users.  This makes it impossible to learn whether any user runs
+specific program (given the program doesn't reveal itself by its behaviour).
+As an additional bonus, as /proc/<pid>/cmdline is unaccessible for other users,
+poorly written programs passing sensitive information via program arguments are
+now protected against local eavesdroppers.
+
+hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be fully invisible to other
+users.  It doesn't mean that it hides a fact whether a process with a specific
+pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"),
+but it hides process' uid and gid, which may be learned by stat()'ing
+/proc/<pid>/ otherwise.  It greatly complicates an intruder's task of gathering
+information about running processes, whether some daemon runs with elevated
+privileges, whether other user runs some sensitive program, whether other users
+run any program at all, etc.
+
+gid= defines a group authorized to learn processes information otherwise
+prohibited by hidepid=.  If you use some daemon like identd which needs to learn
+information about processes information, just add identd to this group.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4d755fed3ecb..8173dfd89cb2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -631,6 +631,50 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
 	return 0;
 }
 
+/*
+ * May current process learn task's sched/cmdline info (for hide_pid_min=1)
+ * or euid/egid (for hide_pid_min=2)?
+ */
+static bool has_pid_permissions(struct pid_namespace *pid,
+				 struct task_struct *task,
+				 int hide_pid_min)
+{
+	if (pid->hide_pid < hide_pid_min)
+		return true;
+	if (in_group_p(pid->pid_gid))
+		return true;
+	return ptrace_may_access(task, PTRACE_MODE_READ);
+}
+
+
+static int proc_pid_permission(struct inode *inode, int mask)
+{
+	struct pid_namespace *pid = inode->i_sb->s_fs_info;
+	struct task_struct *task;
+	bool has_perms;
+
+	task = get_proc_task(inode);
+	has_perms = has_pid_permissions(pid, task, 1);
+	put_task_struct(task);
+
+	if (!has_perms) {
+		if (pid->hide_pid == 2) {
+			/*
+			 * Let's make getdents(), stat(), and open()
+			 * consistent with each other.  If a process
+			 * may not stat() a file, it shouldn't be seen
+			 * in procfs at all.
+			 */
+			return -ENOENT;
+		}
+
+		return -EPERM;
+	}
+	return generic_permission(inode, mask);
+}
+
+
+
 static const struct inode_operations proc_def_inode_operations = {
 	.setattr	= proc_setattr,
 };
@@ -1615,6 +1659,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task;
 	const struct cred *cred;
+	struct pid_namespace *pid = dentry->d_sb->s_fs_info;
 
 	generic_fillattr(inode, stat);
 
@@ -1623,6 +1668,14 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	stat->gid = 0;
 	task = pid_task(proc_pid(inode), PIDTYPE_PID);
 	if (task) {
+		if (!has_pid_permissions(pid, task, 2)) {
+			rcu_read_unlock();
+			/*
+			 * This doesn't prevent learning whether PID exists,
+			 * it only makes getattr() consistent with readdir().
+			 */
+			return -ENOENT;
+		}
 		if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
 		    task_dumpable(task)) {
 			cred = __task_cred(task);
@@ -3119,6 +3172,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
 	.lookup		= proc_tgid_base_lookup,
 	.getattr	= pid_getattr,
 	.setattr	= proc_setattr,
+	.permission	= proc_pid_permission,
 };
 
 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
@@ -3322,6 +3376,12 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
 				proc_pid_instantiate, iter.task, NULL);
 }
 
+static int fake_filldir(void *buf, const char *name, int namelen,
+			loff_t offset, u64 ino, unsigned d_type)
+{
+	return 0;
+}
+
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
@@ -3329,6 +3389,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct task_struct *reaper;
 	struct tgid_iter iter;
 	struct pid_namespace *ns;
+	filldir_t __filldir;
 
 	if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
 		goto out_no_task;
@@ -3350,8 +3411,13 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	for (iter = next_tgid(ns, iter);
 	     iter.task;
 	     iter.tgid += 1, iter = next_tgid(ns, iter)) {
+		if (has_pid_permissions(ns, iter.task, 2))
+			__filldir = filldir;
+		else
+			__filldir = fake_filldir;
+
 		filp->f_pos = iter.tgid + TGID_OFFSET;
-		if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) {
+		if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
 			put_task_struct(iter.task);
 			goto out;
 		}
@@ -3686,6 +3752,7 @@ static const struct inode_operations proc_task_inode_operations = {
 	.lookup		= proc_task_lookup,
 	.getattr	= proc_task_getattr,
 	.setattr	= proc_setattr,
+	.permission	= proc_pid_permission,
 };
 
 static const struct file_operations proc_task_operations = {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 27c762f34870..84fd3235a590 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -106,6 +106,14 @@ void __init proc_init_inodecache(void)
 
 static int proc_show_options(struct seq_file *seq, struct dentry *root)
 {
+	struct super_block *sb = root->d_sb;
+	struct pid_namespace *pid = sb->s_fs_info;
+
+	if (pid->pid_gid)
+		seq_printf(seq, ",gid=%lu", (unsigned long)pid->pid_gid);
+	if (pid->hide_pid != 0)
+		seq_printf(seq, ",hidepid=%u", pid->hide_pid);
+
 	return 0;
 }
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 6a8ac1d361a9..46a15d8a29ca 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -38,10 +38,12 @@ static int proc_set_super(struct super_block *sb, void *data)
 }
 
 enum {
-	Opt_err,
+	Opt_gid, Opt_hidepid, Opt_err,
 };
 
 static const match_table_t tokens = {
+	{Opt_hidepid, "hidepid=%u"},
+	{Opt_gid, "gid=%u"},
 	{Opt_err, NULL},
 };
 
@@ -49,8 +51,7 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
 {
 	char *p;
 	substring_t args[MAX_OPT_ARGS];
-
-	pr_debug("proc: options = %s\n", options);
+	int option;
 
 	if (!options)
 		return 1;
@@ -63,6 +64,20 @@ static int proc_parse_options(char *options, struct pid_namespace *pid)
 		args[0].to = args[0].from = 0;
 		token = match_token(p, tokens, args);
 		switch (token) {
+		case Opt_gid:
+			if (match_int(&args[0], &option))
+				return 0;
+			pid->pid_gid = option;
+			break;
+		case Opt_hidepid:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option < 0 || option > 2) {
+				pr_err("proc: hidepid value must be between 0 and 2.\n");
+				return 0;
+			}
+			pid->hide_pid = option;
+			break;
 		default:
 			pr_err("proc: unrecognized mount option \"%s\" "
 			       "or missing value\n", p);
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 38d10326246a..e7cf6669ac34 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -30,6 +30,8 @@ struct pid_namespace {
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct bsd_acct_struct *bacct;
 #endif
+	gid_t pid_gid;
+	int hide_pid;
 };
 
 extern struct pid_namespace init_pid_ns;
-- 
cgit v1.2.3


From b196be89cdc14a88cc637cdad845a75c5886c82d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 10 Jan 2012 15:11:35 -0800
Subject: workqueue: make alloc_workqueue() take printf fmt and args for name

alloc_workqueue() currently expects the passed in @name pointer to remain
accessible.  This is inconvenient and a bit silly given that the whole wq
is being dynamically allocated.  This patch updates alloc_workqueue() and
friends to take printf format string instead of opaque string and matching
varargs at the end.  The name is allocated together with the wq and
formatted.

alloc_ordered_workqueue() is converted to a macro to unify varargs
handling with alloc_workqueue(), and, while at it, add comment to
alloc_workqueue().

None of the current in-kernel users pass in string with '%' as constant
name and this change shouldn't cause any problem.

[akpm@linux-foundation.org: use __printf]
Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 47 +++++++++++++++++++++++++++++++----------------
 kernel/workqueue.c        | 32 ++++++++++++++++++++++----------
 2 files changed, 53 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0d556deb497b..eb8b9f15f2e0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -297,32 +297,50 @@ extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
 
 extern struct workqueue_struct *
-__alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
-		      struct lock_class_key *key, const char *lock_name);
+__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
+	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
 
+/**
+ * alloc_workqueue - allocate a workqueue
+ * @fmt: printf format for the name of the workqueue
+ * @flags: WQ_* flags
+ * @max_active: max in-flight work items, 0 for default
+ * @args: args for @fmt
+ *
+ * Allocate a workqueue with the specified parameters.  For detailed
+ * information on WQ_* flags, please refer to Documentation/workqueue.txt.
+ *
+ * The __lock_name macro dance is to guarantee that single lock_class_key
+ * doesn't end up with different namesm, which isn't allowed by lockdep.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
 #ifdef CONFIG_LOCKDEP
-#define alloc_workqueue(name, flags, max_active)		\
+#define alloc_workqueue(fmt, flags, max_active, args...)	\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
 								\
-	if (__builtin_constant_p(name))				\
-		__lock_name = (name);				\
+	if (__builtin_constant_p(fmt))				\
+		__lock_name = (fmt);				\
 	else							\
-		__lock_name = #name;				\
+		__lock_name = #fmt;				\
 								\
-	__alloc_workqueue_key((name), (flags), (max_active),	\
-			      &__key, __lock_name);		\
+	__alloc_workqueue_key((fmt), (flags), (max_active),	\
+			      &__key, __lock_name, ##args);	\
 })
 #else
-#define alloc_workqueue(name, flags, max_active)		\
-	__alloc_workqueue_key((name), (flags), (max_active), NULL, NULL)
+#define alloc_workqueue(fmt, flags, max_active, args...)	\
+	__alloc_workqueue_key((fmt), (flags), (max_active),	\
+			      NULL, NULL, ##args)
 #endif
 
 /**
  * alloc_ordered_workqueue - allocate an ordered workqueue
- * @name: name of the workqueue
+ * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
+ * @args: args for @fmt
  *
  * Allocate an ordered workqueue.  An ordered workqueue executes at
  * most one work item at any given time in the queued order.  They are
@@ -331,11 +349,8 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
  * RETURNS:
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
-static inline struct workqueue_struct *
-alloc_ordered_workqueue(const char *name, unsigned int flags)
-{
-	return alloc_workqueue(name, WQ_UNBOUND | flags, 1);
-}
+#define alloc_ordered_workqueue(fmt, flags, args...)		\
+	alloc_workqueue(fmt, WQ_UNBOUND | (flags), 1, ##args)
 
 #define create_workqueue(name)					\
 	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 42fa9ad0a810..bec7b5b53e03 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -242,10 +242,10 @@ struct workqueue_struct {
 
 	int			nr_drainers;	/* W: drain in progress */
 	int			saved_max_active; /* W: saved cwq max_active */
-	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
+	char			name[];		/* I: workqueue name */
 };
 
 struct workqueue_struct *system_wq __read_mostly;
@@ -2954,14 +2954,29 @@ static int wq_clamp_max_active(int max_active, unsigned int flags,
 	return clamp_val(max_active, 1, lim);
 }
 
-struct workqueue_struct *__alloc_workqueue_key(const char *name,
+struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 					       unsigned int flags,
 					       int max_active,
 					       struct lock_class_key *key,
-					       const char *lock_name)
+					       const char *lock_name, ...)
 {
+	va_list args, args1;
 	struct workqueue_struct *wq;
 	unsigned int cpu;
+	size_t namelen;
+
+	/* determine namelen, allocate wq and format name */
+	va_start(args, lock_name);
+	va_copy(args1, args);
+	namelen = vsnprintf(NULL, 0, fmt, args) + 1;
+
+	wq = kzalloc(sizeof(*wq) + namelen, GFP_KERNEL);
+	if (!wq)
+		goto err;
+
+	vsnprintf(wq->name, namelen, fmt, args1);
+	va_end(args);
+	va_end(args1);
 
 	/*
 	 * Workqueues which may be used during memory reclaim should
@@ -2978,12 +2993,9 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 		flags |= WQ_HIGHPRI;
 
 	max_active = max_active ?: WQ_DFL_ACTIVE;
-	max_active = wq_clamp_max_active(max_active, flags, name);
-
-	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
-	if (!wq)
-		goto err;
+	max_active = wq_clamp_max_active(max_active, flags, wq->name);
 
+	/* init wq */
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
@@ -2991,7 +3003,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
 
-	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
@@ -3020,7 +3031,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 		if (!rescuer)
 			goto err;
 
-		rescuer->task = kthread_create(rescuer_thread, wq, "%s", name);
+		rescuer->task = kthread_create(rescuer_thread, wq, "%s",
+					       wq->name);
 		if (IS_ERR(rescuer->task))
 			goto err;
 
-- 
cgit v1.2.3


From 0e8caaceff160ad821c83d798fc03812cb810560 Mon Sep 17 00:00:00 2001
From: Tomasz Stanislawski <t.stanislaws@samsung.com>
Date: Wed, 10 Aug 2011 10:37:47 -0300
Subject: [media] v4l: add support for selection api

This patch introduces new api for a precise control of cropping and composing
features for video devices. The new ioctls are VIDIOC_S_SELECTION and
VIDIOC_G_SELECTION.

Signed-off-by: Tomasz Stanislawski <t.stanislaws@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-compat-ioctl32.c |  2 ++
 drivers/media/video/v4l2-ioctl.c          | 34 +++++++++++++++++++++++
 include/linux/videodev2.h                 | 46 +++++++++++++++++++++++++++++++
 include/media/v4l2-ioctl.h                |  4 +++
 4 files changed, 86 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index c68531b88279..af4419e6c658 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -985,6 +985,8 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	case VIDIOC_CROPCAP:
 	case VIDIOC_G_CROP:
 	case VIDIOC_S_CROP:
+	case VIDIOC_G_SELECTION:
+	case VIDIOC_S_SELECTION:
 	case VIDIOC_G_JPEGCOMP:
 	case VIDIOC_S_JPEGCOMP:
 	case VIDIOC_QUERYSTD:
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index 639abeee3392..072cfc1a166f 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -238,6 +238,8 @@ static const char *v4l2_ioctls[] = {
 	[_IOC_NR(VIDIOC_CROPCAP)]          = "VIDIOC_CROPCAP",
 	[_IOC_NR(VIDIOC_G_CROP)]           = "VIDIOC_G_CROP",
 	[_IOC_NR(VIDIOC_S_CROP)]           = "VIDIOC_S_CROP",
+	[_IOC_NR(VIDIOC_G_SELECTION)]      = "VIDIOC_G_SELECTION",
+	[_IOC_NR(VIDIOC_S_SELECTION)]      = "VIDIOC_S_SELECTION",
 	[_IOC_NR(VIDIOC_G_JPEGCOMP)]       = "VIDIOC_G_JPEGCOMP",
 	[_IOC_NR(VIDIOC_S_JPEGCOMP)]       = "VIDIOC_S_JPEGCOMP",
 	[_IOC_NR(VIDIOC_QUERYSTD)]         = "VIDIOC_QUERYSTD",
@@ -1571,6 +1573,38 @@ static long __video_do_ioctl(struct file *file,
 		ret = ops->vidioc_s_crop(file, fh, p);
 		break;
 	}
+	case VIDIOC_G_SELECTION:
+	{
+		struct v4l2_selection *p = arg;
+
+		if (!ops->vidioc_g_selection)
+			break;
+
+		dbgarg(cmd, "type=%s\n", prt_names(p->type, v4l2_type_names));
+
+		ret = ops->vidioc_g_selection(file, fh, p);
+		if (!ret)
+			dbgrect(vfd, "", &p->r);
+		break;
+	}
+	case VIDIOC_S_SELECTION:
+	{
+		struct v4l2_selection *p = arg;
+
+		if (!ops->vidioc_s_selection)
+			break;
+
+		if (ret_prio) {
+			ret = ret_prio;
+			break;
+		}
+
+		dbgarg(cmd, "type=%s\n", prt_names(p->type, v4l2_type_names));
+		dbgrect(vfd, "", &p->r);
+
+		ret = ops->vidioc_s_selection(file, fh, p);
+		break;
+	}
 	case VIDIOC_CROPCAP:
 	{
 		struct v4l2_cropcap *p = arg;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index b2e1331ca76b..012a29604522 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -743,6 +743,48 @@ struct v4l2_crop {
 	struct v4l2_rect        c;
 };
 
+/* Hints for adjustments of selection rectangle */
+#define V4L2_SEL_FLAG_GE	0x00000001
+#define V4L2_SEL_FLAG_LE	0x00000002
+
+/* Selection targets */
+
+/* current cropping area */
+#define V4L2_SEL_TGT_CROP_ACTIVE	0
+/* default cropping area */
+#define V4L2_SEL_TGT_CROP_DEFAULT	1
+/* cropping bounds */
+#define V4L2_SEL_TGT_CROP_BOUNDS	2
+/* current composing area */
+#define V4L2_SEL_TGT_COMPOSE_ACTIVE	256
+/* default composing area */
+#define V4L2_SEL_TGT_COMPOSE_DEFAULT	257
+/* composing bounds */
+#define V4L2_SEL_TGT_COMPOSE_BOUNDS	258
+/* current composing area plus all padding pixels */
+#define V4L2_SEL_TGT_COMPOSE_PADDED	259
+
+/**
+ * struct v4l2_selection - selection info
+ * @type:	buffer type (do not use *_MPLANE types)
+ * @target:	selection target, used to choose one of possible rectangles
+ * @flags:	constraints flags
+ * @r:		coordinates of selection window
+ * @reserved:	for future use, rounds structure size to 64 bytes, set to zero
+ *
+ * Hardware may use multiple helper window to process a video stream.
+ * The structure is used to exchange this selection areas between
+ * an application and a driver.
+ */
+struct v4l2_selection {
+	__u32			type;
+	__u32			target;
+	__u32                   flags;
+	struct v4l2_rect        r;
+	__u32                   reserved[9];
+};
+
+
 /*
  *      A N A L O G   V I D E O   S T A N D A R D
  */
@@ -2259,6 +2301,10 @@ struct v4l2_create_buffers {
 #define VIDIOC_CREATE_BUFS	_IOWR('V', 92, struct v4l2_create_buffers)
 #define VIDIOC_PREPARE_BUF	_IOWR('V', 93, struct v4l2_buffer)
 
+/* Experimental selection API */
+#define VIDIOC_G_SELECTION	_IOWR('V', 94, struct v4l2_selection)
+#define VIDIOC_S_SELECTION	_IOWR('V', 95, struct v4l2_selection)
+
 /* Reminder: when adding new ioctls please add support for them to
    drivers/media/video/v4l2-compat-ioctl32.c as well! */
 
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 4d1c74ad4c84..3f5d60fc5df6 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -196,6 +196,10 @@ struct v4l2_ioctl_ops {
 					struct v4l2_crop *a);
 	int (*vidioc_s_crop)           (struct file *file, void *fh,
 					struct v4l2_crop *a);
+	int (*vidioc_g_selection)      (struct file *file, void *fh,
+					struct v4l2_selection *s);
+	int (*vidioc_s_selection)      (struct file *file, void *fh,
+					struct v4l2_selection *s);
 	/* Compression ioctls */
 	int (*vidioc_g_jpegcomp)       (struct file *file, void *fh,
 					struct v4l2_jpegcompression *a);
-- 
cgit v1.2.3


From e0c2a9aa1e68455dc3439e95d85cabcaff073666 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 9 Jan 2012 17:18:05 -0500
Subject: GFS2: dlm based recovery coordination

This new method of managing recovery is an alternative to
the previous approach of using the userland gfs_controld.

- use dlm slot numbers to assign journal id's
- use dlm recovery callbacks to initiate journal recovery
- use a dlm lock to determine the first node to mount fs
- use a dlm lock to track journals that need recovery

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c             |   2 +-
 fs/gfs2/glock.h             |   7 +-
 fs/gfs2/incore.h            |  58 ++-
 fs/gfs2/lock_dlm.c          | 993 +++++++++++++++++++++++++++++++++++++++++++-
 fs/gfs2/main.c              |  10 +
 fs/gfs2/ops_fstype.c        |  29 +-
 fs/gfs2/recovery.c          |   4 +
 fs/gfs2/sys.c               |  33 +-
 fs/gfs2/sys.h               |   2 +
 include/linux/gfs2_ondisk.h |   2 +
 10 files changed, 1098 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 88e8a23d0026..376816fcd040 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1353,7 +1353,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 	spin_lock(&gl->gl_spin);
 	gl->gl_reply = ret;
 
-	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
+	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
 		if (gfs2_should_freeze(gl)) {
 			set_bit(GLF_FROZEN, &gl->gl_flags);
 			spin_unlock(&gl->gl_spin);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 2553b858a72e..307ac31df781 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -121,8 +121,11 @@ enum {
 
 struct lm_lockops {
 	const char *lm_proto_name;
-	int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
- 	void (*lm_unmount) (struct gfs2_sbd *sdp);
+	int (*lm_mount) (struct gfs2_sbd *sdp, const char *table);
+	void (*lm_first_done) (struct gfs2_sbd *sdp);
+	void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
+				    unsigned int result);
+	void (*lm_unmount) (struct gfs2_sbd *sdp);
 	void (*lm_withdraw) (struct gfs2_sbd *sdp);
 	void (*lm_put_lock) (struct gfs2_glock *gl);
 	int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e1d3bb59945c..b9422bc8e2fe 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -139,8 +139,45 @@ struct gfs2_bufdata {
 #define GDLM_STRNAME_BYTES	25
 #define GDLM_LVB_SIZE		32
 
+/*
+ * ls_recover_flags:
+ *
+ * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been
+ * held by failed nodes whose journals need recovery.  Those locks should
+ * only be used for journal recovery until the journal recovery is done.
+ * This is set by the dlm recover_prep callback and cleared by the
+ * gfs2_control thread when journal recovery is complete.  To avoid
+ * races between recover_prep setting and gfs2_control clearing, recover_spin
+ * is held while changing this bit and reading/writing recover_block
+ * and recover_start.
+ *
+ * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used.
+ *
+ * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing
+ * recovery of all journals before allowing other nodes to mount the fs.
+ * This is cleared when FIRST_MOUNT_DONE is set.
+ *
+ * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished
+ * recovery of all journals, and now allows other nodes to mount the fs.
+ *
+ * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared
+ * BLOCK_LOCKS for the first time.  The gfs2_control thread should now
+ * control clearing BLOCK_LOCKS for further recoveries.
+ *
+ * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq.
+ *
+ * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep()
+ * and recover_done(), i.e. set while recover_block == recover_start.
+ */
+
 enum {
 	DFL_BLOCK_LOCKS		= 0,
+	DFL_NO_DLM_OPS		= 1,
+	DFL_FIRST_MOUNT		= 2,
+	DFL_FIRST_MOUNT_DONE	= 3,
+	DFL_MOUNT_DONE		= 4,
+	DFL_UNMOUNT		= 5,
+	DFL_DLM_RECOVERY	= 6,
 };
 
 struct lm_lockname {
@@ -499,14 +536,26 @@ struct gfs2_sb_host {
 struct lm_lockstruct {
 	int ls_jid;
 	unsigned int ls_first;
-	unsigned int ls_first_done;
 	unsigned int ls_nodir;
 	const struct lm_lockops *ls_ops;
-	unsigned long ls_flags;
 	dlm_lockspace_t *ls_dlm;
 
-	int ls_recover_jid_done;
-	int ls_recover_jid_status;
+	int ls_recover_jid_done;   /* These two are deprecated, */
+	int ls_recover_jid_status; /* used previously by gfs_controld */
+
+	struct dlm_lksb ls_mounted_lksb; /* mounted_lock */
+	struct dlm_lksb ls_control_lksb; /* control_lock */
+	char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
+	struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
+
+	spinlock_t ls_recover_spin; /* protects following fields */
+	unsigned long ls_recover_flags; /* DFL_ */
+	uint32_t ls_recover_mount; /* gen in first recover_done cb */
+	uint32_t ls_recover_start; /* gen in last recover_done cb */
+	uint32_t ls_recover_block; /* copy recover_start in last recover_prep */
+	uint32_t ls_recover_size; /* size of recover_submit, recover_result */
+	uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */
+	uint32_t *ls_recover_result; /* result of last jid recovery */
 };
 
 struct gfs2_sbd {
@@ -544,6 +593,7 @@ struct gfs2_sbd {
 	wait_queue_head_t sd_glock_wait;
 	atomic_t sd_glock_disposal;
 	struct completion sd_locking_init;
+	struct delayed_work sd_control_work;
 
 	/* Inode Stuff */
 
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index ce85b62bc0a2..8944d1e32ab5 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2009 Red Hat, Inc.  All rights reserved.
+ * Copyright 2004-2011 Red Hat, Inc.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -11,12 +11,15 @@
 #include <linux/dlm.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
 
 #include "incore.h"
 #include "glock.h"
 #include "util.h"
+#include "sys.h"
 
+extern struct workqueue_struct *gfs2_control_wq;
 
 static void gdlm_ast(void *arg)
 {
@@ -185,34 +188,1002 @@ static void gdlm_cancel(struct gfs2_glock *gl)
 	dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
 }
 
-static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname)
+/*
+ * dlm/gfs2 recovery coordination using dlm_recover callbacks
+ *
+ *  1. dlm_controld sees lockspace members change
+ *  2. dlm_controld blocks dlm-kernel locking activity
+ *  3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep)
+ *  4. dlm_controld starts and finishes its own user level recovery
+ *  5. dlm_controld starts dlm-kernel dlm_recoverd to do kernel recovery
+ *  6. dlm_recoverd notifies gfs2 of failed nodes (recover_slot)
+ *  7. dlm_recoverd does its own lock recovery
+ *  8. dlm_recoverd unblocks dlm-kernel locking activity
+ *  9. dlm_recoverd notifies gfs2 when done (recover_done with new generation)
+ * 10. gfs2_control updates control_lock lvb with new generation and jid bits
+ * 11. gfs2_control enqueues journals for gfs2_recover to recover (maybe none)
+ * 12. gfs2_recover dequeues and recovers journals of failed nodes
+ * 13. gfs2_recover provides recovery results to gfs2_control (recovery_result)
+ * 14. gfs2_control updates control_lock lvb jid bits for recovered journals
+ * 15. gfs2_control unblocks normal locking when all journals are recovered
+ *
+ * - failures during recovery
+ *
+ * recover_prep() may set BLOCK_LOCKS (step 3) again before gfs2_control
+ * clears BLOCK_LOCKS (step 15), e.g. another node fails while still
+ * recovering for a prior failure.  gfs2_control needs a way to detect
+ * this so it can leave BLOCK_LOCKS set in step 15.  This is managed using
+ * the recover_block and recover_start values.
+ *
+ * recover_done() provides a new lockspace generation number each time it
+ * is called (step 9).  This generation number is saved as recover_start.
+ * When recover_prep() is called, it sets BLOCK_LOCKS and sets
+ * recover_block = recover_start.  So, while recover_block is equal to
+ * recover_start, BLOCK_LOCKS should remain set.  (recover_spin must
+ * be held around the BLOCK_LOCKS/recover_block/recover_start logic.)
+ *
+ * - more specific gfs2 steps in sequence above
+ *
+ *  3. recover_prep sets BLOCK_LOCKS and sets recover_block = recover_start
+ *  6. recover_slot records any failed jids (maybe none)
+ *  9. recover_done sets recover_start = new generation number
+ * 10. gfs2_control sets control_lock lvb = new gen + bits for failed jids
+ * 12. gfs2_recover does journal recoveries for failed jids identified above
+ * 14. gfs2_control clears control_lock lvb bits for recovered jids
+ * 15. gfs2_control checks if recover_block == recover_start (step 3 occured
+ *     again) then do nothing, otherwise if recover_start > recover_block
+ *     then clear BLOCK_LOCKS.
+ *
+ * - parallel recovery steps across all nodes
+ *
+ * All nodes attempt to update the control_lock lvb with the new generation
+ * number and jid bits, but only the first to get the control_lock EX will
+ * do so; others will see that it's already done (lvb already contains new
+ * generation number.)
+ *
+ * . All nodes get the same recover_prep/recover_slot/recover_done callbacks
+ * . All nodes attempt to set control_lock lvb gen + bits for the new gen
+ * . One node gets control_lock first and writes the lvb, others see it's done
+ * . All nodes attempt to recover jids for which they see control_lock bits set
+ * . One node succeeds for a jid, and that one clears the jid bit in the lvb
+ * . All nodes will eventually see all lvb bits clear and unblock locks
+ *
+ * - is there a problem with clearing an lvb bit that should be set
+ *   and missing a journal recovery?
+ *
+ * 1. jid fails
+ * 2. lvb bit set for step 1
+ * 3. jid recovered for step 1
+ * 4. jid taken again (new mount)
+ * 5. jid fails (for step 4)
+ * 6. lvb bit set for step 5 (will already be set)
+ * 7. lvb bit cleared for step 3
+ *
+ * This is not a problem because the failure in step 5 does not
+ * require recovery, because the mount in step 4 could not have
+ * progressed far enough to unblock locks and access the fs.  The
+ * control_mount() function waits for all recoveries to be complete
+ * for the latest lockspace generation before ever unblocking locks
+ * and returning.  The mount in step 4 waits until the recovery in
+ * step 1 is done.
+ *
+ * - special case of first mounter: first node to mount the fs
+ *
+ * The first node to mount a gfs2 fs needs to check all the journals
+ * and recover any that need recovery before other nodes are allowed
+ * to mount the fs.  (Others may begin mounting, but they must wait
+ * for the first mounter to be done before taking locks on the fs
+ * or accessing the fs.)  This has two parts:
+ *
+ * 1. The mounted_lock tells a node it's the first to mount the fs.
+ * Each node holds the mounted_lock in PR while it's mounted.
+ * Each node tries to acquire the mounted_lock in EX when it mounts.
+ * If a node is granted the mounted_lock EX it means there are no
+ * other mounted nodes (no PR locks exist), and it is the first mounter.
+ * The mounted_lock is demoted to PR when first recovery is done, so
+ * others will fail to get an EX lock, but will get a PR lock.
+ *
+ * 2. The control_lock blocks others in control_mount() while the first
+ * mounter is doing first mount recovery of all journals.
+ * A mounting node needs to acquire control_lock in EX mode before
+ * it can proceed.  The first mounter holds control_lock in EX while doing
+ * the first mount recovery, blocking mounts from other nodes, then demotes
+ * control_lock to NL when it's done (others_may_mount/first_done),
+ * allowing other nodes to continue mounting.
+ *
+ * first mounter:
+ * control_lock EX/NOQUEUE success
+ * mounted_lock EX/NOQUEUE success (no other PR, so no other mounters)
+ * set first=1
+ * do first mounter recovery
+ * mounted_lock EX->PR
+ * control_lock EX->NL, write lvb generation
+ *
+ * other mounter:
+ * control_lock EX/NOQUEUE success (if fail -EAGAIN, retry)
+ * mounted_lock EX/NOQUEUE fail -EAGAIN (expected due to other mounters PR)
+ * mounted_lock PR/NOQUEUE success
+ * read lvb generation
+ * control_lock EX->NL
+ * set first=0
+ *
+ * - mount during recovery
+ *
+ * If a node mounts while others are doing recovery (not first mounter),
+ * the mounting node will get its initial recover_done() callback without
+ * having seen any previous failures/callbacks.
+ *
+ * It must wait for all recoveries preceding its mount to be finished
+ * before it unblocks locks.  It does this by repeating the "other mounter"
+ * steps above until the lvb generation number is >= its mount generation
+ * number (from initial recover_done) and all lvb bits are clear.
+ *
+ * - control_lock lvb format
+ *
+ * 4 bytes generation number: the latest dlm lockspace generation number
+ * from recover_done callback.  Indicates the jid bitmap has been updated
+ * to reflect all slot failures through that generation.
+ * 4 bytes unused.
+ * GDLM_LVB_SIZE-8 bytes of jid bit map. If bit N is set, it indicates
+ * that jid N needs recovery.
+ */
+
+#define JID_BITMAP_OFFSET 8 /* 4 byte generation number + 4 byte unused */
+
+static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
+			     char *lvb_bits)
+{
+	uint32_t gen;
+	memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
+	memcpy(&gen, lvb_bits, sizeof(uint32_t));
+	*lvb_gen = le32_to_cpu(gen);
+}
+
+static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
+			      char *lvb_bits)
+{
+	uint32_t gen;
+	memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
+	gen = cpu_to_le32(lvb_gen);
+	memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
+}
+
+static int all_jid_bits_clear(char *lvb)
+{
+	int i;
+	for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) {
+		if (lvb[i])
+			return 0;
+	}
+	return 1;
+}
+
+static void sync_wait_cb(void *arg)
+{
+	struct lm_lockstruct *ls = arg;
+	complete(&ls->ls_sync_wait);
+}
+
+static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 	int error;
 
-	if (fsname == NULL) {
-		fs_info(sdp, "no fsname found\n");
-		return -EINVAL;
+	error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
+	if (error) {
+		fs_err(sdp, "%s lkid %x error %d\n",
+		       name, lksb->sb_lkid, error);
+		return error;
+	}
+
+	wait_for_completion(&ls->ls_sync_wait);
+
+	if (lksb->sb_status != -DLM_EUNLOCK) {
+		fs_err(sdp, "%s lkid %x status %d\n",
+		       name, lksb->sb_lkid, lksb->sb_status);
+		return -1;
+	}
+	return 0;
+}
+
+static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
+		     unsigned int num, struct dlm_lksb *lksb, char *name)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char strname[GDLM_STRNAME_BYTES];
+	int error, status;
+
+	memset(strname, 0, GDLM_STRNAME_BYTES);
+	snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
+
+	error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
+			 strname, GDLM_STRNAME_BYTES - 1,
+			 0, sync_wait_cb, ls, NULL);
+	if (error) {
+		fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
+		       name, lksb->sb_lkid, flags, mode, error);
+		return error;
+	}
+
+	wait_for_completion(&ls->ls_sync_wait);
+
+	status = lksb->sb_status;
+
+	if (status && status != -EAGAIN) {
+		fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n",
+		       name, lksb->sb_lkid, flags, mode, status);
+	}
+
+	return status;
+}
+
+static int mounted_unlock(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock");
+}
+
+static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK,
+			 &ls->ls_mounted_lksb, "mounted_lock");
+}
+
+static int control_unlock(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock");
+}
+
+static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK,
+			 &ls->ls_control_lksb, "control_lock");
+}
+
+static void gfs2_control_func(struct work_struct *work)
+{
+	struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char lvb_bits[GDLM_LVB_SIZE];
+	uint32_t block_gen, start_gen, lvb_gen, flags;
+	int recover_set = 0;
+	int write_lvb = 0;
+	int recover_size;
+	int i, error;
+
+	spin_lock(&ls->ls_recover_spin);
+	/*
+	 * No MOUNT_DONE means we're still mounting; control_mount()
+	 * will set this flag, after which this thread will take over
+	 * all further clearing of BLOCK_LOCKS.
+	 *
+	 * FIRST_MOUNT means this node is doing first mounter recovery,
+	 * for which recovery control is handled by
+	 * control_mount()/control_first_done(), not this thread.
+	 */
+	if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
+	     test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+	block_gen = ls->ls_recover_block;
+	start_gen = ls->ls_recover_start;
+	spin_unlock(&ls->ls_recover_spin);
+
+	/*
+	 * Equal block_gen and start_gen implies we are between
+	 * recover_prep and recover_done callbacks, which means
+	 * dlm recovery is in progress and dlm locking is blocked.
+	 * There's no point trying to do any work until recover_done.
+	 */
+
+	if (block_gen == start_gen)
+		return;
+
+	/*
+	 * Propagate recover_submit[] and recover_result[] to lvb:
+	 * dlm_recoverd adds to recover_submit[] jids needing recovery
+	 * gfs2_recover adds to recover_result[] journal recovery results
+	 *
+	 * set lvb bit for jids in recover_submit[] if the lvb has not
+	 * yet been updated for the generation of the failure
+	 *
+	 * clear lvb bit for jids in recover_result[] if the result of
+	 * the journal recovery is SUCCESS
+	 */
+
+	error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
+	if (error) {
+		fs_err(sdp, "control lock EX error %d\n", error);
+		return;
+	}
+
+	control_lvb_read(ls, &lvb_gen, lvb_bits);
+
+	spin_lock(&ls->ls_recover_spin);
+	if (block_gen != ls->ls_recover_block ||
+	    start_gen != ls->ls_recover_start) {
+		fs_info(sdp, "recover generation %u block1 %u %u\n",
+			start_gen, block_gen, ls->ls_recover_block);
+		spin_unlock(&ls->ls_recover_spin);
+		control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
+		return;
+	}
+
+	recover_size = ls->ls_recover_size;
+
+	if (lvb_gen <= start_gen) {
+		/*
+		 * Clear lvb bits for jids we've successfully recovered.
+		 * Because all nodes attempt to recover failed journals,
+		 * a journal can be recovered multiple times successfully
+		 * in succession.  Only the first will really do recovery,
+		 * the others find it clean, but still report a successful
+		 * recovery.  So, another node may have already recovered
+		 * the jid and cleared the lvb bit for it.
+		 */
+		for (i = 0; i < recover_size; i++) {
+			if (ls->ls_recover_result[i] != LM_RD_SUCCESS)
+				continue;
+
+			ls->ls_recover_result[i] = 0;
+
+			if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET))
+				continue;
+
+			__clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+			write_lvb = 1;
+		}
+	}
+
+	if (lvb_gen == start_gen) {
+		/*
+		 * Failed slots before start_gen are already set in lvb.
+		 */
+		for (i = 0; i < recover_size; i++) {
+			if (!ls->ls_recover_submit[i])
+				continue;
+			if (ls->ls_recover_submit[i] < lvb_gen)
+				ls->ls_recover_submit[i] = 0;
+		}
+	} else if (lvb_gen < start_gen) {
+		/*
+		 * Failed slots before start_gen are not yet set in lvb.
+		 */
+		for (i = 0; i < recover_size; i++) {
+			if (!ls->ls_recover_submit[i])
+				continue;
+			if (ls->ls_recover_submit[i] < start_gen) {
+				ls->ls_recover_submit[i] = 0;
+				__set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+			}
+		}
+		/* even if there are no bits to set, we need to write the
+		   latest generation to the lvb */
+		write_lvb = 1;
+	} else {
+		/*
+		 * we should be getting a recover_done() for lvb_gen soon
+		 */
+	}
+	spin_unlock(&ls->ls_recover_spin);
+
+	if (write_lvb) {
+		control_lvb_write(ls, start_gen, lvb_bits);
+		flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
+	} else {
+		flags = DLM_LKF_CONVERT;
+	}
+
+	error = control_lock(sdp, DLM_LOCK_NL, flags);
+	if (error) {
+		fs_err(sdp, "control lock NL error %d\n", error);
+		return;
+	}
+
+	/*
+	 * Everyone will see jid bits set in the lvb, run gfs2_recover_set(),
+	 * and clear a jid bit in the lvb if the recovery is a success.
+	 * Eventually all journals will be recovered, all jid bits will
+	 * be cleared in the lvb, and everyone will clear BLOCK_LOCKS.
+	 */
+
+	for (i = 0; i < recover_size; i++) {
+		if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) {
+			fs_info(sdp, "recover generation %u jid %d\n",
+				start_gen, i);
+			gfs2_recover_set(sdp, i);
+			recover_set++;
+		}
+	}
+	if (recover_set)
+		return;
+
+	/*
+	 * No more jid bits set in lvb, all recovery is done, unblock locks
+	 * (unless a new recover_prep callback has occured blocking locks
+	 * again while working above)
+	 */
+
+	spin_lock(&ls->ls_recover_spin);
+	if (ls->ls_recover_block == block_gen &&
+	    ls->ls_recover_start == start_gen) {
+		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		fs_info(sdp, "recover generation %u done\n", start_gen);
+		gfs2_glock_thaw(sdp);
+	} else {
+		fs_info(sdp, "recover generation %u block2 %u %u\n",
+			start_gen, block_gen, ls->ls_recover_block);
+		spin_unlock(&ls->ls_recover_spin);
+	}
+}
+
+static int control_mount(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char lvb_bits[GDLM_LVB_SIZE];
+	uint32_t start_gen, block_gen, mount_gen, lvb_gen;
+	int mounted_mode;
+	int retries = 0;
+	int error;
+
+	memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb));
+	memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb));
+	memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE);
+	ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb;
+	init_completion(&ls->ls_sync_wait);
+
+	set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+
+	error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK);
+	if (error) {
+		fs_err(sdp, "control_mount control_lock NL error %d\n", error);
+		return error;
+	}
+
+	error = mounted_lock(sdp, DLM_LOCK_NL, 0);
+	if (error) {
+		fs_err(sdp, "control_mount mounted_lock NL error %d\n", error);
+		control_unlock(sdp);
+		return error;
+	}
+	mounted_mode = DLM_LOCK_NL;
+
+restart:
+	if (retries++ && signal_pending(current)) {
+		error = -EINTR;
+		goto fail;
+	}
+
+	/*
+	 * We always start with both locks in NL. control_lock is
+	 * demoted to NL below so we don't need to do it here.
+	 */
+
+	if (mounted_mode != DLM_LOCK_NL) {
+		error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
+		if (error)
+			goto fail;
+		mounted_mode = DLM_LOCK_NL;
+	}
+
+	/*
+	 * Other nodes need to do some work in dlm recovery and gfs2_control
+	 * before the recover_done and control_lock will be ready for us below.
+	 * A delay here is not required but often avoids having to retry.
+	 */
+
+	msleep_interruptible(500);
+
+	/*
+	 * Acquire control_lock in EX and mounted_lock in either EX or PR.
+	 * control_lock lvb keeps track of any pending journal recoveries.
+	 * mounted_lock indicates if any other nodes have the fs mounted.
+	 */
+
+	error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK);
+	if (error == -EAGAIN) {
+		goto restart;
+	} else if (error) {
+		fs_err(sdp, "control_mount control_lock EX error %d\n", error);
+		goto fail;
+	}
+
+	error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
+	if (!error) {
+		mounted_mode = DLM_LOCK_EX;
+		goto locks_done;
+	} else if (error != -EAGAIN) {
+		fs_err(sdp, "control_mount mounted_lock EX error %d\n", error);
+		goto fail;
+	}
+
+	error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
+	if (!error) {
+		mounted_mode = DLM_LOCK_PR;
+		goto locks_done;
+	} else {
+		/* not even -EAGAIN should happen here */
+		fs_err(sdp, "control_mount mounted_lock PR error %d\n", error);
+		goto fail;
+	}
+
+locks_done:
+	/*
+	 * If we got both locks above in EX, then we're the first mounter.
+	 * If not, then we need to wait for the control_lock lvb to be
+	 * updated by other mounted nodes to reflect our mount generation.
+	 *
+	 * In simple first mounter cases, first mounter will see zero lvb_gen,
+	 * but in cases where all existing nodes leave/fail before mounting
+	 * nodes finish control_mount, then all nodes will be mounting and
+	 * lvb_gen will be non-zero.
+	 */
+
+	control_lvb_read(ls, &lvb_gen, lvb_bits);
+
+	if (lvb_gen == 0xFFFFFFFF) {
+		/* special value to force mount attempts to fail */
+		fs_err(sdp, "control_mount control_lock disabled\n");
+		error = -EINVAL;
+		goto fail;
+	}
+
+	if (mounted_mode == DLM_LOCK_EX) {
+		/* first mounter, keep both EX while doing first recovery */
+		spin_lock(&ls->ls_recover_spin);
+		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+		set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
+		set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		fs_info(sdp, "first mounter control generation %u\n", lvb_gen);
+		return 0;
+	}
+
+	error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
+	if (error)
+		goto fail;
+
+	/*
+	 * We are not first mounter, now we need to wait for the control_lock
+	 * lvb generation to be >= the generation from our first recover_done
+	 * and all lvb bits to be clear (no pending journal recoveries.)
+	 */
+
+	if (!all_jid_bits_clear(lvb_bits)) {
+		/* journals need recovery, wait until all are clear */
+		fs_info(sdp, "control_mount wait for journal recovery\n");
+		goto restart;
+	}
+
+	spin_lock(&ls->ls_recover_spin);
+	block_gen = ls->ls_recover_block;
+	start_gen = ls->ls_recover_start;
+	mount_gen = ls->ls_recover_mount;
+
+	if (lvb_gen < mount_gen) {
+		/* wait for mounted nodes to update control_lock lvb to our
+		   generation, which might include new recovery bits set */
+		fs_info(sdp, "control_mount wait1 block %u start %u mount %u "
+			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
+			lvb_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		goto restart;
+	}
+
+	if (lvb_gen != start_gen) {
+		/* wait for mounted nodes to update control_lock lvb to the
+		   latest recovery generation */
+		fs_info(sdp, "control_mount wait2 block %u start %u mount %u "
+			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
+			lvb_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		goto restart;
+	}
+
+	if (block_gen == start_gen) {
+		/* dlm recovery in progress, wait for it to finish */
+		fs_info(sdp, "control_mount wait3 block %u start %u mount %u "
+			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
+			lvb_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		goto restart;
 	}
 
-	error = dlm_new_lockspace(fsname, NULL, 
-				  DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
-				  (ls->ls_nodir ? DLM_LSFL_NODIR : 0),
-				  GDLM_LVB_SIZE, NULL, NULL, NULL, &ls->ls_dlm);
+	clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+	set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
+	memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
+	memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
+	spin_unlock(&ls->ls_recover_spin);
+	return 0;
+
+fail:
+	mounted_unlock(sdp);
+	control_unlock(sdp);
+	return error;
+}
+
+static int dlm_recovery_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
+static int control_first_done(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char lvb_bits[GDLM_LVB_SIZE];
+	uint32_t start_gen, block_gen;
+	int error;
+
+restart:
+	spin_lock(&ls->ls_recover_spin);
+	start_gen = ls->ls_recover_start;
+	block_gen = ls->ls_recover_block;
+
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) ||
+	    !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
+	    !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		/* sanity check, should not happen */
+		fs_err(sdp, "control_first_done start %u block %u flags %lx\n",
+		       start_gen, block_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		control_unlock(sdp);
+		return -1;
+	}
+
+	if (start_gen == block_gen) {
+		/*
+		 * Wait for the end of a dlm recovery cycle to switch from
+		 * first mounter recovery.  We can ignore any recover_slot
+		 * callbacks between the recover_prep and next recover_done
+		 * because we are still the first mounter and any failed nodes
+		 * have not fully mounted, so they don't need recovery.
+		 */
+		spin_unlock(&ls->ls_recover_spin);
+		fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
+
+		wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
+			    dlm_recovery_wait, TASK_UNINTERRUPTIBLE);
+		goto restart;
+	}
+
+	clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
+	set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags);
+	memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
+	memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
+	spin_unlock(&ls->ls_recover_spin);
+
+	memset(lvb_bits, 0, sizeof(lvb_bits));
+	control_lvb_write(ls, start_gen, lvb_bits);
+
+	error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT);
+	if (error)
+		fs_err(sdp, "control_first_done mounted PR error %d\n", error);
+
+	error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
 	if (error)
-		printk(KERN_ERR "dlm_new_lockspace error %d", error);
+		fs_err(sdp, "control_first_done control NL error %d\n", error);
 
 	return error;
 }
 
+/*
+ * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC)
+ * to accomodate the largest slot number.  (NB dlm slot numbers start at 1,
+ * gfs2 jids start at 0, so jid = slot - 1)
+ */
+
+#define RECOVER_SIZE_INC 16
+
+static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
+			    int num_slots)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	uint32_t *submit = NULL;
+	uint32_t *result = NULL;
+	uint32_t old_size, new_size;
+	int i, max_jid;
+
+	max_jid = 0;
+	for (i = 0; i < num_slots; i++) {
+		if (max_jid < slots[i].slot - 1)
+			max_jid = slots[i].slot - 1;
+	}
+
+	old_size = ls->ls_recover_size;
+
+	if (old_size >= max_jid + 1)
+		return 0;
+
+	new_size = old_size + RECOVER_SIZE_INC;
+
+	submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+	result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+	if (!submit || !result) {
+		kfree(submit);
+		kfree(result);
+		return -ENOMEM;
+	}
+
+	spin_lock(&ls->ls_recover_spin);
+	memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t));
+	memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t));
+	kfree(ls->ls_recover_submit);
+	kfree(ls->ls_recover_result);
+	ls->ls_recover_submit = submit;
+	ls->ls_recover_result = result;
+	ls->ls_recover_size = new_size;
+	spin_unlock(&ls->ls_recover_spin);
+	return 0;
+}
+
+static void free_recover_size(struct lm_lockstruct *ls)
+{
+	kfree(ls->ls_recover_submit);
+	kfree(ls->ls_recover_result);
+	ls->ls_recover_submit = NULL;
+	ls->ls_recover_result = NULL;
+	ls->ls_recover_size = 0;
+}
+
+/* dlm calls before it does lock recovery */
+
+static void gdlm_recover_prep(void *arg)
+{
+	struct gfs2_sbd *sdp = arg;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	spin_lock(&ls->ls_recover_spin);
+	ls->ls_recover_block = ls->ls_recover_start;
+	set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
+
+	if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
+	     test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+	set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+/* dlm calls after recover_prep has been completed on all lockspace members;
+   identifies slot/jid of failed member */
+
+static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
+{
+	struct gfs2_sbd *sdp = arg;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	int jid = slot->slot - 1;
+
+	spin_lock(&ls->ls_recover_spin);
+	if (ls->ls_recover_size < jid + 1) {
+		fs_err(sdp, "recover_slot jid %d gen %u short size %d",
+		       jid, ls->ls_recover_block, ls->ls_recover_size);
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+
+	if (ls->ls_recover_submit[jid]) {
+		fs_info(sdp, "recover_slot jid %d gen %u prev %u",
+			jid, ls->ls_recover_block, ls->ls_recover_submit[jid]);
+	}
+	ls->ls_recover_submit[jid] = ls->ls_recover_block;
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+/* dlm calls after recover_slot and after it completes lock recovery */
+
+static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
+			      int our_slot, uint32_t generation)
+{
+	struct gfs2_sbd *sdp = arg;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	/* ensure the ls jid arrays are large enough */
+	set_recover_size(sdp, slots, num_slots);
+
+	spin_lock(&ls->ls_recover_spin);
+	ls->ls_recover_start = generation;
+
+	if (!ls->ls_recover_mount) {
+		ls->ls_recover_mount = generation;
+		ls->ls_jid = our_slot - 1;
+	}
+
+	if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
+		queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
+
+	clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY);
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+/* gfs2_recover thread has a journal recovery result */
+
+static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
+				 unsigned int result)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
+		return;
+
+	/* don't care about the recovery of own journal during mount */
+	if (jid == ls->ls_jid)
+		return;
+
+	spin_lock(&ls->ls_recover_spin);
+	if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+	if (ls->ls_recover_size < jid + 1) {
+		fs_err(sdp, "recovery_result jid %d short size %d",
+		       jid, ls->ls_recover_size);
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+
+	fs_info(sdp, "recover jid %d result %s\n", jid,
+		result == LM_RD_GAVEUP ? "busy" : "success");
+
+	ls->ls_recover_result[jid] = result;
+
+	/* GAVEUP means another node is recovering the journal; delay our
+	   next attempt to recover it, to give the other node a chance to
+	   finish before trying again */
+
+	if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
+		queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work,
+				   result == LM_RD_GAVEUP ? HZ : 0);
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+const struct dlm_lockspace_ops gdlm_lockspace_ops = {
+	.recover_prep = gdlm_recover_prep,
+	.recover_slot = gdlm_recover_slot,
+	.recover_done = gdlm_recover_done,
+};
+
+static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char cluster[GFS2_LOCKNAME_LEN];
+	const char *fsname;
+	uint32_t flags;
+	int error, ops_result;
+
+	/*
+	 * initialize everything
+	 */
+
+	INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
+	spin_lock_init(&ls->ls_recover_spin);
+	ls->ls_recover_flags = 0;
+	ls->ls_recover_mount = 0;
+	ls->ls_recover_start = 0;
+	ls->ls_recover_block = 0;
+	ls->ls_recover_size = 0;
+	ls->ls_recover_submit = NULL;
+	ls->ls_recover_result = NULL;
+
+	error = set_recover_size(sdp, NULL, 0);
+	if (error)
+		goto fail;
+
+	/*
+	 * prepare dlm_new_lockspace args
+	 */
+
+	fsname = strchr(table, ':');
+	if (!fsname) {
+		fs_info(sdp, "no fsname found\n");
+		error = -EINVAL;
+		goto fail_free;
+	}
+	memset(cluster, 0, sizeof(cluster));
+	memcpy(cluster, table, strlen(table) - strlen(fsname));
+	fsname++;
+
+	flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
+	if (ls->ls_nodir)
+		flags |= DLM_LSFL_NODIR;
+
+	/*
+	 * create/join lockspace
+	 */
+
+	error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
+				  &gdlm_lockspace_ops, sdp, &ops_result,
+				  &ls->ls_dlm);
+	if (error) {
+		fs_err(sdp, "dlm_new_lockspace error %d\n", error);
+		goto fail_free;
+	}
+
+	if (ops_result < 0) {
+		/*
+		 * dlm does not support ops callbacks,
+		 * old dlm_controld/gfs_controld are used, try without ops.
+		 */
+		fs_info(sdp, "dlm lockspace ops not used\n");
+		free_recover_size(ls);
+		set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags);
+		return 0;
+	}
+
+	if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) {
+		fs_err(sdp, "dlm lockspace ops disallow jid preset\n");
+		error = -EINVAL;
+		goto fail_release;
+	}
+
+	/*
+	 * control_mount() uses control_lock to determine first mounter,
+	 * and for later mounts, waits for any recoveries to be cleared.
+	 */
+
+	error = control_mount(sdp);
+	if (error) {
+		fs_err(sdp, "mount control error %d\n", error);
+		goto fail_release;
+	}
+
+	ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
+	clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
+	return 0;
+
+fail_release:
+	dlm_release_lockspace(ls->ls_dlm, 2);
+fail_free:
+	free_recover_size(ls);
+fail:
+	return error;
+}
+
+static void gdlm_first_done(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	int error;
+
+	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
+		return;
+
+	error = control_first_done(sdp);
+	if (error)
+		fs_err(sdp, "mount first_done error %d\n", error);
+}
+
 static void gdlm_unmount(struct gfs2_sbd *sdp)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 
+	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
+		goto release;
+
+	/* wait for gfs2_control_wq to be done with this mount */
+
+	spin_lock(&ls->ls_recover_spin);
+	set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
+	spin_unlock(&ls->ls_recover_spin);
+	flush_delayed_work_sync(&sdp->sd_control_work);
+
+	/* mounted_lock and control_lock will be purged in dlm recovery */
+release:
 	if (ls->ls_dlm) {
 		dlm_release_lockspace(ls->ls_dlm, 2);
 		ls->ls_dlm = NULL;
 	}
+
+	free_recover_size(ls);
 }
 
 static const match_table_t dlm_tokens = {
@@ -226,6 +1197,8 @@ static const match_table_t dlm_tokens = {
 const struct lm_lockops gfs2_dlm_ops = {
 	.lm_proto_name = "lock_dlm",
 	.lm_mount = gdlm_mount,
+	.lm_first_done = gdlm_first_done,
+	.lm_recovery_result = gdlm_recovery_result,
 	.lm_unmount = gdlm_unmount,
 	.lm_put_lock = gdlm_put_lock,
 	.lm_lock = gdlm_lock,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index c150298e2d8e..a8d9bcd0e19c 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -28,6 +28,8 @@
 #include "recovery.h"
 #include "dir.h"
 
+struct workqueue_struct *gfs2_control_wq;
+
 static struct shrinker qd_shrinker = {
 	.shrink = gfs2_shrink_qd_memory,
 	.seeks = DEFAULT_SEEKS,
@@ -146,12 +148,19 @@ static int __init init_gfs2_fs(void)
 	if (!gfs_recovery_wq)
 		goto fail_wq;
 
+	gfs2_control_wq = alloc_workqueue("gfs2_control",
+			       WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
+	if (!gfs2_control_wq)
+		goto fail_control;
+
 	gfs2_register_debugfs();
 
 	printk("GFS2 installed\n");
 
 	return 0;
 
+fail_control:
+	destroy_workqueue(gfs_recovery_wq);
 fail_wq:
 	unregister_filesystem(&gfs2meta_fs_type);
 fail_unregister:
@@ -195,6 +204,7 @@ static void __exit exit_gfs2_fs(void)
 	unregister_filesystem(&gfs2_fs_type);
 	unregister_filesystem(&gfs2meta_fs_type);
 	destroy_workqueue(gfs_recovery_wq);
+	destroy_workqueue(gfs2_control_wq);
 
 	rcu_barrier();
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index fe72e79e6ff9..b01573b7ad96 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -562,8 +562,12 @@ static void gfs2_others_may_mount(struct gfs2_sbd *sdp)
 {
 	char *message = "FIRSTMOUNT=Done";
 	char *envp[] = { message, NULL };
-	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	ls->ls_first_done = 1;
+
+	fs_info(sdp, "first mount done, others may mount\n");
+
+	if (sdp->sd_lockstruct.ls_ops->lm_first_done)
+		sdp->sd_lockstruct.ls_ops->lm_first_done(sdp);
+
 	kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 }
 
@@ -944,7 +948,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
 	struct gfs2_args *args = &sdp->sd_args;
 	const char *proto = sdp->sd_proto_name;
 	const char *table = sdp->sd_table_name;
-	const char *fsname;
 	char *o, *options;
 	int ret;
 
@@ -1004,21 +1007,12 @@ hostdata_error:
 		}
 	}
 
-	if (sdp->sd_args.ar_spectator)
-		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
-	else
-		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
-			 sdp->sd_lockstruct.ls_jid);
-
-	fsname = strchr(table, ':');
-	if (fsname)
-		fsname++;
 	if (lm->lm_mount == NULL) {
 		fs_info(sdp, "Now mounting FS...\n");
 		complete_all(&sdp->sd_locking_init);
 		return 0;
 	}
-	ret = lm->lm_mount(sdp, fsname);
+	ret = lm->lm_mount(sdp, table);
 	if (ret == 0)
 		fs_info(sdp, "Joined cluster. Now mounting FS...\n");
 	complete_all(&sdp->sd_locking_init);
@@ -1124,6 +1118,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 	if (error)
 		goto fail;
 
+	snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
+
 	gfs2_create_debugfs_file(sdp);
 
 	error = gfs2_sys_fs_add(sdp);
@@ -1160,6 +1156,13 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 		goto fail_sb;
 	}
 
+	if (sdp->sd_args.ar_spectator)
+		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
+			 sdp->sd_table_name);
+	else
+		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
+			 sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
+
 	error = init_inodes(sdp, DO);
 	if (error)
 		goto fail_sb;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index f2a02edcac8f..af49e8f432fe 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -436,12 +436,16 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
 	char env_status[20];
 	char *envp[] = { env_jid, env_status, NULL };
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
         ls->ls_recover_jid_done = jid;
         ls->ls_recover_jid_status = message;
 	sprintf(env_jid, "JID=%d", jid);
 	sprintf(env_status, "RECOVERY=%s",
 		message == LM_RD_SUCCESS ? "Done" : "Failed");
         kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
+
+	if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
+		sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
 }
 
 void gfs2_recover_func(struct work_struct *work)
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 443cabcfcd23..d33172c291ba 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -298,7 +298,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf)
 	ssize_t ret;
 	int val = 0;
 
-	if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
 		val = 1;
 	ret = sprintf(buf, "%d\n", val);
 	return ret;
@@ -313,9 +313,9 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 	val = simple_strtol(buf, NULL, 0);
 
 	if (val == 1)
-		set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+		set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
 	else if (val == 0) {
-		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
 		smp_mb__after_clear_bit();
 		gfs2_glock_thaw(sdp);
 	} else {
@@ -350,8 +350,8 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 		goto out;
 	if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
 		goto out;
-        sdp->sd_lockstruct.ls_first = first;
-        rv = 0;
+	sdp->sd_lockstruct.ls_first = first;
+	rv = 0;
 out:
         spin_unlock(&sdp->sd_jindex_spin);
         return rv ? rv : len;
@@ -360,19 +360,14 @@ out:
 static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	return sprintf(buf, "%d\n", ls->ls_first_done);
+	return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
 }
 
-static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
 {
-	unsigned jid;
 	struct gfs2_jdesc *jd;
 	int rv;
 
-	rv = sscanf(buf, "%u", &jid);
-	if (rv != 1)
-		return -EINVAL;
-
 	rv = -ESHUTDOWN;
 	spin_lock(&sdp->sd_jindex_spin);
 	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
@@ -389,6 +384,20 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 	}
 out:
 	spin_unlock(&sdp->sd_jindex_spin);
+	return rv;
+}
+
+static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+	unsigned jid;
+	int rv;
+
+	rv = sscanf(buf, "%u", &jid);
+	if (rv != 1)
+		return -EINVAL;
+
+	rv = gfs2_recover_set(sdp, jid);
+
 	return rv ? rv : len;
 }
 
diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
index e94560e836d7..79182d6ad6ac 100644
--- a/fs/gfs2/sys.h
+++ b/fs/gfs2/sys.h
@@ -19,5 +19,7 @@ void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
 int gfs2_sys_init(void);
 void gfs2_sys_uninit(void);
 
+int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid);
+
 #endif /* __SYS_DOT_H__ */
 
diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index 4f4462974c14..b148087f49a6 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -22,6 +22,8 @@
 #define GFS2_LIVE_LOCK		1
 #define GFS2_TRANS_LOCK		2
 #define GFS2_RENAME_LOCK	3
+#define GFS2_CONTROL_LOCK	4
+#define GFS2_MOUNTED_LOCK	5
 
 /* Format numbers for various metadata types */
 
-- 
cgit v1.2.3


From b1bd055d397e09f99dcef9b138ed104ff1812fcb Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 11 Jan 2012 16:27:11 +0100
Subject: block: Introduce blk_set_stacking_limits function

Stacking driver queue limits are typically bounded exclusively by the
capabilities of the low level devices, not by the stacking driver
itself.

This patch introduces blk_set_stacking_limits() which has more liberal
metrics than the default queue limits function. This allows us to
inherit topology parameters from bottom devices without manually
tweaking the default limits in each driver prior to calling the stacking
function.

Since there is now a clear distinction between stacking and low-level
devices, blk_set_default_limits() has been modified to carry the more
conservative values that we used to manually set in
blk_queue_make_request().

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c   | 32 ++++++++++++++++++++++++--------
 drivers/md/dm-table.c  |  6 +++---
 drivers/md/md.c        |  1 +
 include/linux/blkdev.h |  1 +
 4 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index fa1eb0449a05..d3234fc494ad 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -104,9 +104,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
  * @lim:  the queue_limits structure to reset
  *
  * Description:
- *   Returns a queue_limit struct to its default state.  Can be used by
- *   stacking drivers like DM that stage table swaps and reuse an
- *   existing device queue.
+ *   Returns a queue_limit struct to its default state.
  */
 void blk_set_default_limits(struct queue_limits *lim)
 {
@@ -114,13 +112,12 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_integrity_segments = 0;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-	lim->max_sectors = BLK_DEF_MAX_SECTORS;
-	lim->max_hw_sectors = INT_MAX;
+	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
 	lim->max_discard_sectors = 0;
 	lim->discard_granularity = 0;
 	lim->discard_alignment = 0;
 	lim->discard_misaligned = 0;
-	lim->discard_zeroes_data = 1;
+	lim->discard_zeroes_data = 0;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -130,6 +127,27 @@ void blk_set_default_limits(struct queue_limits *lim)
 }
 EXPORT_SYMBOL(blk_set_default_limits);
 
+/**
+ * blk_set_stacking_limits - set default limits for stacking devices
+ * @lim:  the queue_limits structure to reset
+ *
+ * Description:
+ *   Returns a queue_limit struct to its default state. Should be used
+ *   by stacking drivers like DM that have no internal limits.
+ */
+void blk_set_stacking_limits(struct queue_limits *lim)
+{
+	blk_set_default_limits(lim);
+
+	/* Inherit limits from component devices */
+	lim->discard_zeroes_data = 1;
+	lim->max_segments = USHRT_MAX;
+	lim->max_hw_sectors = UINT_MAX;
+
+	lim->max_sectors = BLK_DEF_MAX_SECTORS;
+}
+EXPORT_SYMBOL(blk_set_stacking_limits);
+
 /**
  * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
@@ -165,8 +183,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->nr_batching = BLK_BATCH_REQ;
 
 	blk_set_default_limits(&q->limits);
-	blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
-	q->limits.discard_zeroes_data = 0;
 
 	/*
 	 * by default assume old behaviour and bounce for any highmem page
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 8e9132130142..63cc54289aff 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -699,7 +699,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
 	while (i < dm_table_get_num_targets(table)) {
 		ti = dm_table_get_target(table, i++);
 
-		blk_set_default_limits(&ti_limits);
+		blk_set_stacking_limits(&ti_limits);
 
 		/* combine all target devices' limits */
 		if (ti->type->iterate_devices)
@@ -1221,10 +1221,10 @@ int dm_calculate_queue_limits(struct dm_table *table,
 	struct queue_limits ti_limits;
 	unsigned i = 0;
 
-	blk_set_default_limits(limits);
+	blk_set_stacking_limits(limits);
 
 	while (i < dm_table_get_num_targets(table)) {
-		blk_set_default_limits(&ti_limits);
+		blk_set_stacking_limits(&ti_limits);
 
 		ti = dm_table_get_target(table, i++);
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ee981737edfc..114ba155af87 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4622,6 +4622,7 @@ static int md_alloc(dev_t dev, char *name)
 	mddev->queue->queuedata = mddev;
 
 	blk_queue_make_request(mddev->queue, md_make_request);
+	blk_set_stacking_limits(&mddev->queue->limits);
 
 	disk = alloc_disk(1 << shift);
 	if (!disk) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8bca04873f53..adc34133a56a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -844,6 +844,7 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
+extern void blk_set_stacking_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
 extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
-- 
cgit v1.2.3


From ef00f59c95fe6e002e7c6e3663cdea65e253f4cc Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Wed, 11 Jan 2012 16:29:31 +0100
Subject: block: Add BLKROTATIONAL ioctl

Introduce an ioctl which permits applications to query whether a block
device is rotational.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/compat_ioctl.c | 3 +++
 block/ioctl.c        | 2 ++
 include/linux/fs.h   | 1 +
 3 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7b725020823c..7c668c8a6f95 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -719,6 +719,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case BLKSECTGET:
 		return compat_put_ushort(arg,
 					 queue_max_sectors(bdev_get_queue(bdev)));
+	case BLKROTATIONAL:
+		return compat_put_ushort(arg,
+					 !blk_queue_nonrot(bdev_get_queue(bdev)));
 	case BLKRASET: /* compatible, but no compat_ptr (!) */
 	case BLKFRASET:
 		if (!capable(CAP_SYS_ADMIN))
diff --git a/block/ioctl.c b/block/ioctl.c
index ca939fc1030f..337d207ab14d 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -278,6 +278,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		return put_uint(arg, bdev_discard_zeroes_data(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
+	case BLKROTATIONAL:
+		return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
 	case BLKRASET:
 	case BLKFRASET:
 		if(!capable(CAP_SYS_ADMIN))
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0bc4ffb8e7f..95dd911506f1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -319,6 +319,7 @@ struct inodes_stat_t {
 #define BLKPBSZGET _IO(0x12,123)
 #define BLKDISCARDZEROES _IO(0x12,124)
 #define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
-- 
cgit v1.2.3


From c8991362a0d3cf317dfbfb6cb946607870654e6d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 10 Jan 2012 22:36:35 +0000
Subject: inet_diag: Rename inet_diag_req into inet_diag_req_v2

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 12 ++++++------
 net/dccp/diag.c           |  4 ++--
 net/ipv4/inet_diag.c      | 34 +++++++++++++++++-----------------
 net/ipv4/tcp_diag.c       |  4 ++--
 net/ipv4/udp_diag.c       | 14 +++++++-------
 5 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 34e8d52c1925..a5b7e910eea9 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -34,7 +34,7 @@ struct inet_diag_req_compat {
 	__u32	idiag_dbs;		/* Tables to dump (NI) */
 };
 
-struct inet_diag_req {
+struct inet_diag_req_v2 {
 	__u8	sdiag_family;
 	__u8	sdiag_protocol;
 	__u8	idiag_ext;
@@ -143,12 +143,12 @@ struct netlink_callback;
 struct inet_diag_handler {
 	void			(*dump)(struct sk_buff *skb,
 					struct netlink_callback *cb,
-					struct inet_diag_req *r,
+					struct inet_diag_req_v2 *r,
 					struct nlattr *bc);
 
 	int			(*dump_one)(struct sk_buff *in_skb,
 					const struct nlmsghdr *nlh,
-					struct inet_diag_req *req);
+					struct inet_diag_req_v2 *req);
 
 	void			(*idiag_get_info)(struct sock *sk,
 						  struct inet_diag_msg *r,
@@ -158,15 +158,15 @@ struct inet_diag_handler {
 
 struct inet_connection_sock;
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
-			      struct sk_buff *skb, struct inet_diag_req *req,
+			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh);
 void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req *r,
+		struct netlink_callback *cb, struct inet_diag_req_v2 *r,
 		struct nlattr *bc);
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 		struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req);
+		struct inet_diag_req_v2 *req);
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 8f1625753377..028fc43aacbd 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -49,13 +49,13 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 }
 
 static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc);
 }
 
 static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req);
 }
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2240a8e8c44d..cf23a7cacdd4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -71,7 +71,7 @@ static inline void inet_diag_unlock_handler(
 }
 
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
-			      struct sk_buff *skb, struct inet_diag_req *req,
+			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
@@ -193,7 +193,7 @@ nlmsg_failure:
 EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
 
 static int inet_csk_diag_fill(struct sock *sk,
-			      struct sk_buff *skb, struct inet_diag_req *req,
+			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
@@ -202,7 +202,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 }
 
 static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
-			       struct sk_buff *skb, struct inet_diag_req *req,
+			       struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			       u32 pid, u32 seq, u16 nlmsg_flags,
 			       const struct nlmsghdr *unlh)
 {
@@ -253,7 +253,7 @@ nlmsg_failure:
 }
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
-			struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags,
+			struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags,
 			const struct nlmsghdr *unlh)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
@@ -264,7 +264,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 }
 
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
-		const struct nlmsghdr *nlh, struct inet_diag_req *req)
+		const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req)
 {
 	int err;
 	struct sock *sk;
@@ -333,7 +333,7 @@ EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
 static int inet_diag_get_exact(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
-			       struct inet_diag_req *req)
+			       struct inet_diag_req_v2 *req)
 {
 	const struct inet_diag_handler *handler;
 	int err;
@@ -540,7 +540,7 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
 static int inet_csk_diag_dump(struct sock *sk,
 			      struct sk_buff *skb,
 			      struct netlink_callback *cb,
-			      struct inet_diag_req *r,
+			      struct inet_diag_req_v2 *r,
 			      const struct nlattr *bc)
 {
 	if (!inet_diag_bc_sk(bc, sk))
@@ -554,7 +554,7 @@ static int inet_csk_diag_dump(struct sock *sk,
 static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 			       struct sk_buff *skb,
 			       struct netlink_callback *cb,
-			       struct inet_diag_req *r,
+			       struct inet_diag_req_v2 *r,
 			       const struct nlattr *bc)
 {
 	if (bc != NULL) {
@@ -639,7 +639,7 @@ nlmsg_failure:
 
 static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 			       struct netlink_callback *cb,
-			       struct inet_diag_req *r,
+			       struct inet_diag_req_v2 *r,
 			       const struct nlattr *bc)
 {
 	struct inet_diag_entry entry;
@@ -721,7 +721,7 @@ out:
 }
 
 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc)
+		struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	int i, num;
 	int s_i, s_num;
@@ -872,7 +872,7 @@ out:
 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
 
 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	const struct inet_diag_handler *handler;
 
@@ -887,12 +887,12 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nlattr *bc = NULL;
-	int hdrlen = sizeof(struct inet_diag_req);
+	int hdrlen = sizeof(struct inet_diag_req_v2);
 
 	if (nlmsg_attrlen(cb->nlh, hdrlen))
 		bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
 
-	return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc);
+	return __inet_diag_dump(skb, cb, (struct inet_diag_req_v2 *)NLMSG_DATA(cb->nlh), bc);
 }
 
 static inline int inet_diag_type2proto(int type)
@@ -910,7 +910,7 @@ static inline int inet_diag_type2proto(int type)
 static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh);
-	struct inet_diag_req req;
+	struct inet_diag_req_v2 req;
 	struct nlattr *bc = NULL;
 	int hdrlen = sizeof(struct inet_diag_req_compat);
 
@@ -930,7 +930,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh)
 {
 	struct inet_diag_req_compat *rc = NLMSG_DATA(nlh);
-	struct inet_diag_req req;
+	struct inet_diag_req_v2 req;
 
 	req.sdiag_family = rc->idiag_family;
 	req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
@@ -970,7 +970,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 {
-	int hdrlen = sizeof(struct inet_diag_req);
+	int hdrlen = sizeof(struct inet_diag_req_v2);
 
 	if (nlmsg_len(h) < hdrlen)
 		return -EINVAL;
@@ -990,7 +990,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 					  inet_diag_dump, NULL, 0);
 	}
 
-	return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h));
+	return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h));
 }
 
 static struct sock_diag_handler inet_diag_handler = {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 8cd357a8be79..ed3f2ad42e0f 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -35,13 +35,13 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 }
 
 static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
 }
 
 static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 69f8a7ca63dd..e5e18cb8a586 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -19,7 +19,7 @@
 #include <linux/sock_diag.h>
 
 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req *req,
+		struct netlink_callback *cb, struct inet_diag_req_v2 *req,
 		struct nlattr *bc)
 {
 	if (!inet_diag_bc_sk(bc, sk))
@@ -30,7 +30,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
 }
 
 static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
-		const struct nlmsghdr *nlh, struct inet_diag_req *req)
+		const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req)
 {
 	int err = -EINVAL;
 	struct sock *sk;
@@ -88,7 +88,7 @@ out_nosk:
 }
 
 static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	int num, s_num, slot, s_slot;
 
@@ -136,13 +136,13 @@ done:
 }
 
 static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	udp_dump(&udp_table, skb, cb, r, bc);
 }
 
 static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return udp_dump_one(&udp_table, in_skb, nlh, req);
 }
@@ -154,13 +154,13 @@ static const struct inet_diag_handler udp_diag_handler = {
 };
 
 static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	udp_dump(&udplite_table, skb, cb, r, bc);
 }
 
 static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return udp_dump_one(&udplite_table, in_skb, nlh, req);
 }
-- 
cgit v1.2.3


From 3b09c84cb622ffbcdb5d541986b1eaf7d5812602 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 10 Jan 2012 22:37:26 +0000
Subject: inet_diag: Rename inet_diag_req_compat into inet_diag_req

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 2 +-
 net/ipv4/inet_diag.c      | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index a5b7e910eea9..f1362b5447fc 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -22,7 +22,7 @@ struct inet_diag_sockid {
 
 /* Request structure */
 
-struct inet_diag_req_compat {
+struct inet_diag_req {
 	__u8	idiag_family;		/* Family of addresses. */
 	__u8	idiag_src_len;
 	__u8	idiag_dst_len;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index cf23a7cacdd4..fcf281819cd4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -909,10 +909,10 @@ static inline int inet_diag_type2proto(int type)
 
 static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req *rc = NLMSG_DATA(cb->nlh);
 	struct inet_diag_req_v2 req;
 	struct nlattr *bc = NULL;
-	int hdrlen = sizeof(struct inet_diag_req_compat);
+	int hdrlen = sizeof(struct inet_diag_req);
 
 	req.sdiag_family = AF_UNSPEC; /* compatibility */
 	req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
@@ -929,7 +929,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *c
 static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh)
 {
-	struct inet_diag_req_compat *rc = NLMSG_DATA(nlh);
+	struct inet_diag_req *rc = NLMSG_DATA(nlh);
 	struct inet_diag_req_v2 req;
 
 	req.sdiag_family = rc->idiag_family;
@@ -943,7 +943,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	int hdrlen = sizeof(struct inet_diag_req_compat);
+	int hdrlen = sizeof(struct inet_diag_req);
 
 	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
 	    nlmsg_len(nlh) < hdrlen)
-- 
cgit v1.2.3


From fffe5d5aa05b4e69f79bc75a51c5ee0fc6203fa5 Mon Sep 17 00:00:00 2001
From: Qiang Liu <qiang.liu@freescale.com>
Date: Tue, 8 Nov 2011 08:43:08 -0500
Subject: mmc: sd: Macro name cleanup for high speed dtr

Add new macros for the high speed 50MHz case, rather than having
a confusing reuse of the value for UHS SDR50, which is 100MHz.

Reported-by: Aaron Lu <aaron.lu@amd.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sd.c    | 4 ++--
 include/linux/mmc/card.h | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index f2a05ea40f2a..f54392c4638a 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -307,8 +307,8 @@ static int mmc_read_switch(struct mmc_card *card)
 		goto out;
 	}
 
-	if (status[13] & UHS_SDR50_BUS_SPEED)
-		card->sw_caps.hs_max_dtr = 50000000;
+	if (status[13] & SD_MODE_HIGH_SPEED)
+		card->sw_caps.hs_max_dtr = HIGH_SPEED_MAX_DTR;
 
 	if (card->scr.sda_spec3) {
 		card->sw_caps.sd3_bus_mode = status[13];
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c8ef9bc54d50..2c9be29684cf 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -110,6 +110,7 @@ struct sd_ssr {
 struct sd_switch_caps {
 	unsigned int		hs_max_dtr;
 	unsigned int		uhs_max_dtr;
+#define HIGH_SPEED_MAX_DTR	50000000
 #define UHS_SDR104_MAX_DTR	208000000
 #define UHS_SDR50_MAX_DTR	100000000
 #define UHS_DDR50_MAX_DTR	50000000
@@ -117,11 +118,13 @@ struct sd_switch_caps {
 #define UHS_SDR12_MAX_DTR	25000000
 	unsigned int		sd3_bus_mode;
 #define UHS_SDR12_BUS_SPEED	0
+#define HIGH_SPEED_BUS_SPEED	1
 #define UHS_SDR25_BUS_SPEED	1
 #define UHS_SDR50_BUS_SPEED	2
 #define UHS_SDR104_BUS_SPEED	3
 #define UHS_DDR50_BUS_SPEED	4
 
+#define SD_MODE_HIGH_SPEED	(1 << HIGH_SPEED_BUS_SPEED)
 #define SD_MODE_UHS_SDR12	(1 << UHS_SDR12_BUS_SPEED)
 #define SD_MODE_UHS_SDR25	(1 << UHS_SDR25_BUS_SPEED)
 #define SD_MODE_UHS_SDR50	(1 << UHS_SDR50_BUS_SPEED)
-- 
cgit v1.2.3


From df16219f365f7f5a2d88a6e123251d57255cca3f Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Fri, 4 Nov 2011 13:53:19 +0100
Subject: mmc: debugfs: expose the SDCLK frq in sys ios

This patch is to expose the actual SDCLK frequency in
/sys/kernel/debug/mmcX/ios entry.

For example, if the max clk for a normal speed card is 20MHz this
is reported in /sys/kernel/debug/mmcX/ios.  Unfortunately the actual
SDCLK frequency (i.e. Baseclock / divisor) is not reported at all:
for example, in that case, on Arasan HC, it should be 48/4=12 (MHz).

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/debugfs.c |  2 ++
 drivers/mmc/host/sdhci.c   | 10 ++++++++++
 include/linux/mmc/host.h   |  2 ++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 3923880118b6..027615d3bf3e 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -57,6 +57,8 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 	const char *str;
 
 	seq_printf(s, "clock:\t\t%u Hz\n", ios->clock);
+	if (host->actual_clock)
+		seq_printf(s, "actual clock:\t%u Hz\n", host->actual_clock);
 	seq_printf(s, "vdd:\t\t%u ", ios->vdd);
 	if ((1 << ios->vdd) & MMC_VDD_165_195)
 		seq_printf(s, "(1.65 - 1.95 V)\n");
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 19ed580f2cab..a7c23118dab2 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1066,12 +1066,15 @@ static void sdhci_finish_command(struct sdhci_host *host)
 static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	int div = 0; /* Initialized for compiler warning */
+	int real_div = div, clk_mul = 1;
 	u16 clk = 0;
 	unsigned long timeout;
 
 	if (clock == host->clock)
 		return;
 
+	host->mmc->actual_clock = 0;
+
 	if (host->ops->set_clock) {
 		host->ops->set_clock(host, clock);
 		if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK)
@@ -1109,6 +1112,8 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 				 * Control register.
 				 */
 				clk = SDHCI_PROG_CLOCK_MODE;
+				real_div = div;
+				clk_mul = host->clk_mul;
 				div--;
 			}
 		} else {
@@ -1122,6 +1127,7 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 						break;
 				}
 			}
+			real_div = div;
 			div >>= 1;
 		}
 	} else {
@@ -1130,9 +1136,13 @@ static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 			if ((host->max_clk / div) <= clock)
 				break;
 		}
+		real_div = div;
 		div >>= 1;
 	}
 
+	if (real_div)
+		host->mmc->actual_clock = (host->max_clk * clk_mul) / real_div;
+
 	clk |= (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT;
 	clk |= ((div & SDHCI_DIV_HI_MASK) >> SDHCI_DIV_MASK_LEN)
 		<< SDHCI_DIVIDER_HI_SHIFT;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a3ac9c48e5de..cea064f73514 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -323,6 +323,8 @@ struct mmc_host {
 	struct fault_attr	fail_mmc_request;
 #endif
 
+	unsigned int		actual_clock;	/* Actual HC clock rate */
+
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
-- 
cgit v1.2.3


From 597dd9d79cfbbb1636d00a7fd0880355d9b20c41 Mon Sep 17 00:00:00 2001
From: Sujit Reddy Thumma <sthumma@codeaurora.org>
Date: Mon, 14 Nov 2011 13:53:29 +0530
Subject: mmc: core: Use delayed work in clock gating framework

Current clock gating framework disables the MCI clock as soon as the
request is completed and enables it when a request arrives. This aggressive
clock gating framework, when enabled, cause following issues:

When there are back-to-back requests from the Queue layer, we unnecessarily
end up disabling and enabling the clocks between these requests since 8MCLK
clock cycles is a very short duration compared to the time delay between
back to back requests reaching the MMC layer. This overhead can effect the
overall performance depending on how long the clock enable and disable
calls take which is platform dependent. For example on some platforms we
can have clock control not on the local processor, but on a different
subsystem and the time taken to perform the clock enable/disable can add
significant overhead.

Also if the host controller driver decides to disable the host clock too
when mmc_set_ios function is called with ios.clock=0, it adds additional
delay and it is highly possible that the next request had already arrived
and unnecessarily blocked in enabling the clocks. This is seen frequently
when the processor is executing at high speeds and in multi-core platforms
thus reduces the overall throughput compared to if clock gating is
disabled.

Fix this by delaying turning off the clocks by posting request on
delayed workqueue. Also cancel the unscheduled pending work, if any,
when there is access to card.

sysfs entry is provided to tune the delay as needed, default
value set to 200ms.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 Documentation/mmc/mmc-dev-attrs.txt | 10 +++++++
 drivers/mmc/core/host.c             | 57 ++++++++++++++++++++++++++++++++++---
 include/linux/mmc/host.h            |  4 ++-
 3 files changed, 66 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt
index 8898a95b41e5..b0245565875a 100644
--- a/Documentation/mmc/mmc-dev-attrs.txt
+++ b/Documentation/mmc/mmc-dev-attrs.txt
@@ -64,3 +64,13 @@ Note on Erase Size and Preferred Erase Size:
 	size specified by the card.
 
 	"preferred_erase_size" is in bytes.
+
+SD/MMC/SDIO Clock Gating Attribute
+==================================
+
+Read and write access is provided to following attribute.
+This attribute appears only if CONFIG_MMC_CLKGATE is enabled.
+
+	clkgate_delay	Tune the clock gating delay with desired value in milli seconds.
+
+echo <desired delay> > /sys/class/mmc_host/mmcX/clkgate_delay
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index d31c78b72b0f..817a76039743 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -54,6 +54,31 @@ static DEFINE_IDR(mmc_host_idr);
 static DEFINE_SPINLOCK(mmc_host_lock);
 
 #ifdef CONFIG_MMC_CLKGATE
+static ssize_t clkgate_delay_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	return snprintf(buf, PAGE_SIZE, "%lu millisecs\n",
+			host->clkgate_delay);
+}
+
+static ssize_t clkgate_delay_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	unsigned long flags, value;
+
+	if (kstrtoul(buf, 0, &value))
+		return -EINVAL;
+
+	spin_lock_irqsave(&host->clk_lock, flags);
+	host->clkgate_delay = value;
+	spin_unlock_irqrestore(&host->clk_lock, flags);
+
+	pr_info("%s: clock gate delay set to %lu ms\n",
+			mmc_hostname(host), value);
+	return count;
+}
 
 /*
  * Enabling clock gating will make the core call out to the host
@@ -114,7 +139,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host)
 static void mmc_host_clk_gate_work(struct work_struct *work)
 {
 	struct mmc_host *host = container_of(work, struct mmc_host,
-					      clk_gate_work);
+					      clk_gate_work.work);
 
 	mmc_host_clk_gate_delayed(host);
 }
@@ -131,6 +156,8 @@ void mmc_host_clk_hold(struct mmc_host *host)
 {
 	unsigned long flags;
 
+	/* cancel any clock gating work scheduled by mmc_host_clk_release() */
+	cancel_delayed_work_sync(&host->clk_gate_work);
 	mutex_lock(&host->clk_gate_mutex);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (host->clk_gated) {
@@ -180,7 +207,8 @@ void mmc_host_clk_release(struct mmc_host *host)
 	host->clk_requests--;
 	if (mmc_host_may_gate_card(host->card) &&
 	    !host->clk_requests)
-		queue_work(system_nrt_wq, &host->clk_gate_work);
+		queue_delayed_work(system_nrt_wq, &host->clk_gate_work,
+				msecs_to_jiffies(host->clkgate_delay));
 	spin_unlock_irqrestore(&host->clk_lock, flags);
 }
 
@@ -213,8 +241,13 @@ static inline void mmc_host_clk_init(struct mmc_host *host)
 	host->clk_requests = 0;
 	/* Hold MCI clock for 8 cycles by default */
 	host->clk_delay = 8;
+	/*
+	 * Default clock gating delay is 200ms.
+	 * This value can be tuned by writing into sysfs entry.
+	 */
+	host->clkgate_delay = 200;
 	host->clk_gated = false;
-	INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
+	INIT_DELAYED_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
 	spin_lock_init(&host->clk_lock);
 	mutex_init(&host->clk_gate_mutex);
 }
@@ -229,7 +262,7 @@ static inline void mmc_host_clk_exit(struct mmc_host *host)
 	 * Wait for any outstanding gate and then make sure we're
 	 * ungated before exiting.
 	 */
-	if (cancel_work_sync(&host->clk_gate_work))
+	if (cancel_delayed_work_sync(&host->clk_gate_work))
 		mmc_host_clk_gate_delayed(host);
 	if (host->clk_gated)
 		mmc_host_clk_hold(host);
@@ -237,6 +270,17 @@ static inline void mmc_host_clk_exit(struct mmc_host *host)
 	WARN_ON(host->clk_requests > 1);
 }
 
+static inline void mmc_host_clk_sysfs_init(struct mmc_host *host)
+{
+	host->clkgate_delay_attr.show = clkgate_delay_show;
+	host->clkgate_delay_attr.store = clkgate_delay_store;
+	sysfs_attr_init(&host->clkgate_delay_attr.attr);
+	host->clkgate_delay_attr.attr.name = "clkgate_delay";
+	host->clkgate_delay_attr.attr.mode = S_IRUGO | S_IWUSR;
+	if (device_create_file(&host->class_dev, &host->clkgate_delay_attr))
+		pr_err("%s: Failed to create clkgate_delay sysfs entry\n",
+				mmc_hostname(host));
+}
 #else
 
 static inline void mmc_host_clk_init(struct mmc_host *host)
@@ -247,6 +291,10 @@ static inline void mmc_host_clk_exit(struct mmc_host *host)
 {
 }
 
+static inline void mmc_host_clk_sysfs_init(struct mmc_host *host)
+{
+}
+
 #endif
 
 /**
@@ -335,6 +383,7 @@ int mmc_add_host(struct mmc_host *host)
 #ifdef CONFIG_DEBUG_FS
 	mmc_add_host_debugfs(host);
 #endif
+	mmc_host_clk_sysfs_init(host);
 
 	mmc_start_host(host);
 	register_pm_notifier(&host->pm_notify);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cea064f73514..706f72279a17 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -253,10 +253,12 @@ struct mmc_host {
 	int			clk_requests;	/* internal reference counter */
 	unsigned int		clk_delay;	/* number of MCI clk hold cycles */
 	bool			clk_gated;	/* clock gated */
-	struct work_struct	clk_gate_work; /* delayed clock gate */
+	struct delayed_work	clk_gate_work; /* delayed clock gate */
 	unsigned int		clk_old;	/* old clock value cache */
 	spinlock_t		clk_lock;	/* lock for clk fields */
 	struct mutex		clk_gate_mutex;	/* mutex for clock gating */
+	struct device_attribute clkgate_delay_attr;
+	unsigned long           clkgate_delay;
 #endif
 
 	/* host specific block data */
-- 
cgit v1.2.3


From a303c5319c8e6ab0e744ebca118da8420043b2c3 Mon Sep 17 00:00:00 2001
From: Philip Rakity <prakity@marvell.com>
Date: Mon, 14 Nov 2011 19:14:38 -0800
Subject: mmc: sdio: support SDIO UHS cards

This patch adds support for sdio UHS cards per the version 3.0
spec.

UHS mode is only enabled for version 3.0 cards when both the
host and the controller support UHS modes.

1.8v signaling support is removed if both the card and the
host do not support UHS.  This is done to maintain
compatibility and some system/card combinations break when
1.8v signaling is enabled when the host does not support UHS.

Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Aaron Lu <Aaron.lu@amd.com>
Reviewed-by: Arindam Nath <arindam.nath@amd.com>
Tested-by: Bing Zhao <bzhao@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/bus.c   |   2 +-
 drivers/mmc/core/sd.c    |   2 +-
 drivers/mmc/core/sdio.c  | 329 +++++++++++++++++++++++++++++++++++++++++++----
 include/linux/mmc/card.h |   4 +-
 include/linux/mmc/sdio.h |  29 ++++-
 5 files changed, 336 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 6be49249895a..f8a228a61fd4 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -305,7 +305,7 @@ int mmc_add_card(struct mmc_card *card)
 	} else {
 		printk(KERN_INFO "%s: new %s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
-			mmc_sd_card_uhs(card) ? "ultra high speed " :
+			mmc_card_uhs(card) ? "ultra high speed " :
 			(mmc_card_highspeed(card) ? "high speed " : ""),
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type, card->rca);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index f54392c4638a..85b858f6d5d4 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -960,7 +960,7 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 			goto free_card;
 
 		/* Card is an ultra-high-speed card */
-		mmc_sd_card_set_uhs(card);
+		mmc_card_set_uhs(card);
 
 		/*
 		 * Since initialization is now complete, enable preset
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 3ab565e32a6a..8c04f7f46dec 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -102,6 +102,7 @@ static int sdio_read_cccr(struct mmc_card *card)
 	int ret;
 	int cccr_vsn;
 	unsigned char data;
+	unsigned char speed;
 
 	memset(&card->cccr, 0, sizeof(struct sdio_cccr));
 
@@ -140,12 +141,60 @@ static int sdio_read_cccr(struct mmc_card *card)
 	}
 
 	if (cccr_vsn >= SDIO_CCCR_REV_1_20) {
-		ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &data);
+		ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &speed);
 		if (ret)
 			goto out;
 
-		if (data & SDIO_SPEED_SHS)
-			card->cccr.high_speed = 1;
+		card->scr.sda_spec3 = 0;
+		card->sw_caps.sd3_bus_mode = 0;
+		card->sw_caps.sd3_drv_type = 0;
+		if (cccr_vsn >= SDIO_CCCR_REV_3_00) {
+			card->scr.sda_spec3 = 1;
+			ret = mmc_io_rw_direct(card, 0, 0,
+				SDIO_CCCR_UHS, 0, &data);
+			if (ret)
+				goto out;
+
+			if (card->host->caps &
+				(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+				 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
+				 MMC_CAP_UHS_DDR50)) {
+				if (data & SDIO_UHS_DDR50)
+					card->sw_caps.sd3_bus_mode
+						|= SD_MODE_UHS_DDR50;
+
+				if (data & SDIO_UHS_SDR50)
+					card->sw_caps.sd3_bus_mode
+						|= SD_MODE_UHS_SDR50;
+
+				if (data & SDIO_UHS_SDR104)
+					card->sw_caps.sd3_bus_mode
+						|= SD_MODE_UHS_SDR104;
+			}
+
+			ret = mmc_io_rw_direct(card, 0, 0,
+				SDIO_CCCR_DRIVE_STRENGTH, 0, &data);
+			if (ret)
+				goto out;
+
+			if (data & SDIO_DRIVE_SDTA)
+				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_A;
+			if (data & SDIO_DRIVE_SDTC)
+				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_C;
+			if (data & SDIO_DRIVE_SDTD)
+				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_D;
+		}
+
+		/* if no uhs mode ensure we check for high speed */
+		if (!card->sw_caps.sd3_bus_mode) {
+			if (speed & SDIO_SPEED_SHS) {
+				card->cccr.high_speed = 1;
+				card->sw_caps.hs_max_dtr = 50000000;
+			} else {
+				card->cccr.high_speed = 0;
+				card->sw_caps.hs_max_dtr = 25000000;
+			}
+		}
 	}
 
 out:
@@ -327,6 +376,193 @@ static unsigned mmc_sdio_get_max_clock(struct mmc_card *card)
 	return max_dtr;
 }
 
+static unsigned char host_drive_to_sdio_drive(int host_strength)
+{
+	switch (host_strength) {
+	case MMC_SET_DRIVER_TYPE_A:
+		return SDIO_DTSx_SET_TYPE_A;
+	case MMC_SET_DRIVER_TYPE_B:
+		return SDIO_DTSx_SET_TYPE_B;
+	case MMC_SET_DRIVER_TYPE_C:
+		return SDIO_DTSx_SET_TYPE_C;
+	case MMC_SET_DRIVER_TYPE_D:
+		return SDIO_DTSx_SET_TYPE_D;
+	default:
+		return SDIO_DTSx_SET_TYPE_B;
+	}
+}
+
+static void sdio_select_driver_type(struct mmc_card *card)
+{
+	int host_drv_type = SD_DRIVER_TYPE_B;
+	int card_drv_type = SD_DRIVER_TYPE_B;
+	int drive_strength;
+	unsigned char card_strength;
+	int err;
+
+	/*
+	 * If the host doesn't support any of the Driver Types A,C or D,
+	 * or there is no board specific handler then default Driver
+	 * Type B is used.
+	 */
+	if (!(card->host->caps &
+		(MMC_CAP_DRIVER_TYPE_A |
+		 MMC_CAP_DRIVER_TYPE_C |
+		 MMC_CAP_DRIVER_TYPE_D)))
+		return;
+
+	if (!card->host->ops->select_drive_strength)
+		return;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_A)
+		host_drv_type |= SD_DRIVER_TYPE_A;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_C)
+		host_drv_type |= SD_DRIVER_TYPE_C;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_D)
+		host_drv_type |= SD_DRIVER_TYPE_D;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
+		card_drv_type |= SD_DRIVER_TYPE_A;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+		card_drv_type |= SD_DRIVER_TYPE_C;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_D)
+		card_drv_type |= SD_DRIVER_TYPE_D;
+
+	/*
+	 * The drive strength that the hardware can support
+	 * depends on the board design.  Pass the appropriate
+	 * information and let the hardware specific code
+	 * return what is possible given the options
+	 */
+	drive_strength = card->host->ops->select_drive_strength(
+		card->sw_caps.uhs_max_dtr,
+		host_drv_type, card_drv_type);
+
+	/* if error just use default for drive strength B */
+	err = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_DRIVE_STRENGTH, 0,
+		&card_strength);
+	if (err)
+		return;
+
+	card_strength &= ~(SDIO_DRIVE_DTSx_MASK<<SDIO_DRIVE_DTSx_SHIFT);
+	card_strength |= host_drive_to_sdio_drive(drive_strength);
+
+	err = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_DRIVE_STRENGTH,
+		card_strength, NULL);
+
+	/* if error default to drive strength B */
+	if (!err)
+		mmc_set_driver_type(card->host, drive_strength);
+}
+
+
+static int sdio_set_bus_speed_mode(struct mmc_card *card)
+{
+	unsigned int bus_speed, timing;
+	int err;
+	unsigned char speed;
+
+	/*
+	 * If the host doesn't support any of the UHS-I modes, fallback on
+	 * default speed.
+	 */
+	if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50)))
+		return 0;
+
+	bus_speed = SDIO_SPEED_SDR12;
+	timing = MMC_TIMING_UHS_SDR12;
+	if ((card->host->caps & MMC_CAP_UHS_SDR104) &&
+	    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) {
+			bus_speed = SDIO_SPEED_SDR104;
+			timing = MMC_TIMING_UHS_SDR104;
+			card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR;
+	} else if ((card->host->caps & MMC_CAP_UHS_DDR50) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) {
+			bus_speed = SDIO_SPEED_DDR50;
+			timing = MMC_TIMING_UHS_DDR50;
+			card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR50)) {
+			bus_speed = SDIO_SPEED_SDR50;
+			timing = MMC_TIMING_UHS_SDR50;
+			card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) {
+			bus_speed = SDIO_SPEED_SDR25;
+			timing = MMC_TIMING_UHS_SDR25;
+			card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 |
+		    MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR12)) {
+			bus_speed = SDIO_SPEED_SDR12;
+			timing = MMC_TIMING_UHS_SDR12;
+			card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR;
+	}
+
+	err = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &speed);
+	if (err)
+		return err;
+
+	speed &= ~SDIO_SPEED_BSS_MASK;
+	speed |= bus_speed;
+	err = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_SPEED, speed, NULL);
+	if (err)
+		return err;
+
+	if (bus_speed) {
+		mmc_set_timing(card->host, timing);
+		mmc_set_clock(card->host, card->sw_caps.uhs_max_dtr);
+	}
+
+	return 0;
+}
+
+/*
+ * UHS-I specific initialization procedure
+ */
+static int mmc_sdio_init_uhs_card(struct mmc_card *card)
+{
+	int err;
+
+	if (!card->scr.sda_spec3)
+		return 0;
+
+	/*
+	 * Switch to wider bus (if supported).
+	 */
+	if (card->host->caps & MMC_CAP_4_BIT_DATA) {
+		err = sdio_enable_4bit_bus(card);
+		if (err > 0) {
+			mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+			err = 0;
+		}
+	}
+
+	/* Set the driver strength for the card */
+	sdio_select_driver_type(card);
+
+	/* Set bus speed mode of the card */
+	err = sdio_set_bus_speed_mode(card);
+	if (err)
+		goto out;
+
+	/* Initialize and start re-tuning timer */
+	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
+		err = card->host->ops->execute_tuning(card->host);
+
+out:
+
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -393,6 +629,30 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 	if (host->ops->init_card)
 		host->ops->init_card(host, card);
 
+	/*
+	 * If the host and card support UHS-I mode request the card
+	 * to switch to 1.8V signaling level.  No 1.8v signalling if
+	 * UHS mode is not enabled to maintain compatibilty and some
+	 * systems that claim 1.8v signalling in fact do not support
+	 * it.
+	 */
+	if ((ocr & R4_18V_PRESENT) &&
+		(host->caps &
+			(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+			 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
+			 MMC_CAP_UHS_DDR50))) {
+		err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180,
+				true);
+		if (err) {
+			ocr &= ~R4_18V_PRESENT;
+			host->ocr &= ~R4_18V_PRESENT;
+		}
+		err = 0;
+	} else {
+		ocr &= ~R4_18V_PRESENT;
+		host->ocr &= ~R4_18V_PRESENT;
+	}
+
 	/*
 	 * For native busses:  set card RCA and quit open drain mode.
 	 */
@@ -492,29 +752,39 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr,
 	if (err)
 		goto remove;
 
-	/*
-	 * Switch to high-speed (if supported).
-	 */
-	err = sdio_enable_hs(card);
-	if (err > 0)
-		mmc_sd_go_highspeed(card);
-	else if (err)
-		goto remove;
+	/* Initialization sequence for UHS-I cards */
+	/* Only if card supports 1.8v and UHS signaling */
+	if ((ocr & R4_18V_PRESENT) && card->sw_caps.sd3_bus_mode) {
+		err = mmc_sdio_init_uhs_card(card);
+		if (err)
+			goto remove;
 
-	/*
-	 * Change to the card's maximum speed.
-	 */
-	mmc_set_clock(host, mmc_sdio_get_max_clock(card));
+		/* Card is an ultra-high-speed card */
+		mmc_card_set_uhs(card);
+	} else {
+		/*
+		 * Switch to high-speed (if supported).
+		 */
+		err = sdio_enable_hs(card);
+		if (err > 0)
+			mmc_sd_go_highspeed(card);
+		else if (err)
+			goto remove;
 
-	/*
-	 * Switch to wider bus (if supported).
-	 */
-	err = sdio_enable_4bit_bus(card);
-	if (err > 0)
-		mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
-	else if (err)
-		goto remove;
+		/*
+		 * Change to the card's maximum speed.
+		 */
+		mmc_set_clock(host, mmc_sdio_get_max_clock(card));
 
+		/*
+		 * Switch to wider bus (if supported).
+		 */
+		err = sdio_enable_4bit_bus(card);
+		if (err > 0)
+			mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+		else if (err)
+			goto remove;
+	}
 finish:
 	if (!oldcard)
 		host->card = card;
@@ -797,8 +1067,17 @@ int mmc_attach_sdio(struct mmc_host *host)
 	 * Detect and init the card.
 	 */
 	err = mmc_sdio_init_card(host, host->ocr, NULL, 0);
-	if (err)
-		goto err;
+	if (err) {
+		if (err == -EAGAIN) {
+			/*
+			 * Retry initialization with S18R set to 0.
+			 */
+			host->ocr &= ~R4_18V_PRESENT;
+			err = mmc_sdio_init_card(host, host->ocr, NULL, 0);
+		}
+		if (err)
+			goto err;
+	}
 	card = host->card;
 
 	/*
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 2c9be29684cf..534974c3ef0c 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -367,7 +367,8 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
 #define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
-#define mmc_sd_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_sd_card_uhs(c)	((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
@@ -375,6 +376,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
+#define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index e0b1123497b9..c9fe66c58f8f 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -38,6 +38,7 @@
  *      [8:0] Byte/block count
  */
 
+#define R4_18V_PRESENT (1<<24)
 #define R4_MEMORY_PRESENT (1 << 27)
 
 /*
@@ -85,6 +86,7 @@
 #define  SDIO_SD_REV_1_01	0	/* SD Physical Spec Version 1.01 */
 #define  SDIO_SD_REV_1_10	1	/* SD Physical Spec Version 1.10 */
 #define  SDIO_SD_REV_2_00	2	/* SD Physical Spec Version 2.00 */
+#define  SDIO_SD_REV_3_00	3	/* SD Physical Spev Version 3.00 */
 
 #define SDIO_CCCR_IOEx		0x02
 #define SDIO_CCCR_IORx		0x03
@@ -134,8 +136,31 @@
 #define SDIO_CCCR_SPEED		0x13
 
 #define  SDIO_SPEED_SHS		0x01	/* Supports High-Speed mode */
-#define  SDIO_SPEED_EHS		0x02	/* Enable High-Speed mode */
-
+#define  SDIO_SPEED_BSS_SHIFT	1
+#define  SDIO_SPEED_BSS_MASK	(7<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR12	(0<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR25	(1<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR50	(2<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR104	(3<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_DDR50	(4<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_EHS		SDIO_SPEED_SDR25	/* Enable High-Speed */
+
+#define SDIO_CCCR_UHS		0x14
+#define  SDIO_UHS_SDR50		0x01
+#define  SDIO_UHS_SDR104	0x02
+#define  SDIO_UHS_DDR50		0x04
+
+#define SDIO_CCCR_DRIVE_STRENGTH 0x15
+#define  SDIO_SDTx_MASK		0x07
+#define  SDIO_DRIVE_SDTA	(1<<0)
+#define  SDIO_DRIVE_SDTC	(1<<1)
+#define  SDIO_DRIVE_SDTD	(1<<2)
+#define  SDIO_DRIVE_DTSx_MASK	0x03
+#define  SDIO_DRIVE_DTSx_SHIFT	4
+#define  SDIO_DTSx_SET_TYPE_B	(0 << SDIO_DRIVE_DTSx_SHIFT)
+#define  SDIO_DTSx_SET_TYPE_A	(1 << SDIO_DRIVE_DTSx_SHIFT)
+#define  SDIO_DTSx_SET_TYPE_C	(2 << SDIO_DRIVE_DTSx_SHIFT)
+#define  SDIO_DTSx_SET_TYPE_D	(3 << SDIO_DRIVE_DTSx_SHIFT)
 /*
  * Function Basic Registers (FBR)
  */
-- 
cgit v1.2.3


From 5a09262744a0b84719b933ac66801de058776755 Mon Sep 17 00:00:00 2001
From: Per Forlin <per.forlin@stericsson.com>
Date: Mon, 14 Nov 2011 12:02:28 +0100
Subject: mmc: mmci: add capabilities2 for MMC_CAP2

Signed-off-by: Per Forlin <per.forlin@stericsson.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/mmci.c   | 1 +
 include/linux/amba/mmci.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 50b5f9926f64..8eabf999a858 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1239,6 +1239,7 @@ static int __devinit mmci_probe(struct amba_device *dev,
 	if (host->vcc == NULL)
 		mmc->ocr_avail = plat->ocr_mask;
 	mmc->caps = plat->capabilities;
+	mmc->caps2 = plat->capabilities2;
 
 	/*
 	 * We can do SGIO
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 21114810c7c0..0101e9c17fa1 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -30,6 +30,7 @@ struct dma_chan;
  * @cd_invert: true if the gpio_cd pin value is active low
  * @capabilities: the capabilities of the block as implemented in
  * this platform, signify anything MMC_CAP_* from mmc/host.h
+ * @capabilities2: more capabilities, MMC_CAP2_* from mmc/host.h
  * @dma_filter: function used to select an appropriate RX and TX
  * DMA channel to be used for DMA, if and only if you're deploying the
  * generic DMA engine
@@ -52,6 +53,7 @@ struct mmci_platform_data {
 	int	gpio_cd;
 	bool	cd_invert;
 	unsigned long capabilities;
+	unsigned long capabilities2;
 	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
 	void *dma_rx_param;
 	void *dma_tx_param;
-- 
cgit v1.2.3


From d30495048892980e5d453328d1cc9343b3f7e917 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 28 Nov 2011 16:22:00 +0200
Subject: mmc: allow upper layers to know immediately if card has been removed

Add a function mmc_detect_card_removed() which upper layers can use to
determine immediately if a card has been removed. This function should
be called after an I/O request fails so that all queued I/O requests
can be errored out immediately instead of waiting for the card device
to be removed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 51 +++++++++++++++++++++++++++++++++++++++++++++---
 drivers/mmc/core/core.h  |  3 +++
 drivers/mmc/core/mmc.c   | 12 +++++++++++-
 drivers/mmc/core/sd.c    | 12 +++++++++++-
 drivers/mmc/core/sdio.c  | 11 ++++++++++-
 include/linux/mmc/card.h |  3 +++
 include/linux/mmc/core.h |  2 ++
 include/linux/mmc/host.h |  1 +
 8 files changed, 89 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 950b97d7412a..a2aa860956ef 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -140,7 +140,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 			cmd->retries = 0;
 	}
 
-	if (err && cmd->retries) {
+	if (err && cmd->retries && !mmc_card_removed(host->card)) {
 		/*
 		 * Request starter must handle retries - see
 		 * mmc_wait_for_req_done().
@@ -247,6 +247,11 @@ static void __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
 {
 	init_completion(&mrq->completion);
 	mrq->done = mmc_wait_done;
+	if (mmc_card_removed(host->card)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		complete(&mrq->completion);
+		return;
+	}
 	mmc_start_request(host, mrq);
 }
 
@@ -259,7 +264,8 @@ static void mmc_wait_for_req_done(struct mmc_host *host,
 		wait_for_completion(&mrq->completion);
 
 		cmd = mrq->cmd;
-		if (!cmd->error || !cmd->retries)
+		if (!cmd->error || !cmd->retries ||
+		    mmc_card_removed(host->card))
 			break;
 
 		pr_debug("%s: req failed (CMD%u): %d, retrying...\n",
@@ -1456,7 +1462,7 @@ void mmc_detect_change(struct mmc_host *host, unsigned long delay)
 	WARN_ON(host->removed);
 	spin_unlock_irqrestore(&host->lock, flags);
 #endif
-
+	host->detect_change = 1;
 	mmc_schedule_delayed_work(&host->detect, delay);
 }
 
@@ -2049,6 +2055,43 @@ static int mmc_rescan_try_freq(struct mmc_host *host, unsigned freq)
 	return -EIO;
 }
 
+int _mmc_detect_card_removed(struct mmc_host *host)
+{
+	int ret;
+
+	if ((host->caps & MMC_CAP_NONREMOVABLE) || !host->bus_ops->alive)
+		return 0;
+
+	if (!host->card || mmc_card_removed(host->card))
+		return 1;
+
+	ret = host->bus_ops->alive(host);
+	if (ret) {
+		mmc_card_set_removed(host->card);
+		pr_debug("%s: card remove detected\n", mmc_hostname(host));
+	}
+
+	return ret;
+}
+
+int mmc_detect_card_removed(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+
+	WARN_ON(!host->claimed);
+	/*
+	 * The card will be considered unchanged unless we have been asked to
+	 * detect a change or host requires polling to provide card detection.
+	 */
+	if (card && !host->detect_change && !(host->caps & MMC_CAP_NEEDS_POLL))
+		return mmc_card_removed(card);
+
+	host->detect_change = 0;
+
+	return _mmc_detect_card_removed(host);
+}
+EXPORT_SYMBOL(mmc_detect_card_removed);
+
 void mmc_rescan(struct work_struct *work)
 {
 	static const unsigned freqs[] = { 400000, 300000, 200000, 100000 };
@@ -2069,6 +2112,8 @@ void mmc_rescan(struct work_struct *work)
 	    && !(host->caps & MMC_CAP_NONREMOVABLE))
 		host->bus_ops->detect(host);
 
+	host->detect_change = 0;
+
 	/*
 	 * Let mmc_bus_put() free the bus/bus_ops if we've found that
 	 * the card is no longer present.
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 14664f1fb16f..34009241213c 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -24,6 +24,7 @@ struct mmc_bus_ops {
 	int (*resume)(struct mmc_host *);
 	int (*power_save)(struct mmc_host *);
 	int (*power_restore)(struct mmc_host *);
+	int (*alive)(struct mmc_host *);
 };
 
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
@@ -59,6 +60,8 @@ void mmc_rescan(struct work_struct *work);
 void mmc_start_host(struct mmc_host *host);
 void mmc_stop_host(struct mmc_host *host);
 
+int _mmc_detect_card_removed(struct mmc_host *host);
+
 int mmc_attach_mmc(struct mmc_host *host);
 int mmc_attach_sd(struct mmc_host *host);
 int mmc_attach_sdio(struct mmc_host *host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index d240427c1246..fc1059bb6a08 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1104,6 +1104,14 @@ static void mmc_remove(struct mmc_host *host)
 	host->card = NULL;
 }
 
+/*
+ * Card detection - card is alive.
+ */
+static int mmc_alive(struct mmc_host *host)
+{
+	return mmc_send_status(host->card, NULL);
+}
+
 /*
  * Card detection callback from host.
  */
@@ -1119,7 +1127,7 @@ static void mmc_detect(struct mmc_host *host)
 	/*
 	 * Just check if our card has been removed.
 	 */
-	err = mmc_send_status(host->card, NULL);
+	err = _mmc_detect_card_removed(host);
 
 	mmc_release_host(host);
 
@@ -1224,6 +1232,7 @@ static const struct mmc_bus_ops mmc_ops = {
 	.suspend = NULL,
 	.resume = NULL,
 	.power_restore = mmc_power_restore,
+	.alive = mmc_alive,
 };
 
 static const struct mmc_bus_ops mmc_ops_unsafe = {
@@ -1234,6 +1243,7 @@ static const struct mmc_bus_ops mmc_ops_unsafe = {
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
 	.power_restore = mmc_power_restore,
+	.alive = mmc_alive,
 };
 
 static void mmc_attach_bus_ops(struct mmc_host *host)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 85b858f6d5d4..6f27d35081b8 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1018,6 +1018,14 @@ static void mmc_sd_remove(struct mmc_host *host)
 	host->card = NULL;
 }
 
+/*
+ * Card detection - card is alive.
+ */
+static int mmc_sd_alive(struct mmc_host *host)
+{
+	return mmc_send_status(host->card, NULL);
+}
+
 /*
  * Card detection callback from host.
  */
@@ -1033,7 +1041,7 @@ static void mmc_sd_detect(struct mmc_host *host)
 	/*
 	 * Just check if our card has been removed.
 	 */
-	err = mmc_send_status(host->card, NULL);
+	err = _mmc_detect_card_removed(host);
 
 	mmc_release_host(host);
 
@@ -1102,6 +1110,7 @@ static const struct mmc_bus_ops mmc_sd_ops = {
 	.suspend = NULL,
 	.resume = NULL,
 	.power_restore = mmc_sd_power_restore,
+	.alive = mmc_sd_alive,
 };
 
 static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
@@ -1110,6 +1119,7 @@ static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
 	.power_restore = mmc_sd_power_restore,
+	.alive = mmc_sd_alive,
 };
 
 static void mmc_sd_attach_bus_ops(struct mmc_host *host)
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 8c04f7f46dec..b77f770ce5d1 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -819,6 +819,14 @@ static void mmc_sdio_remove(struct mmc_host *host)
 	host->card = NULL;
 }
 
+/*
+ * Card detection - card is alive.
+ */
+static int mmc_sdio_alive(struct mmc_host *host)
+{
+	return mmc_select_card(host->card);
+}
+
 /*
  * Card detection callback from host.
  */
@@ -841,7 +849,7 @@ static void mmc_sdio_detect(struct mmc_host *host)
 	/*
 	 * Just check if our card has been removed.
 	 */
-	err = mmc_select_card(host->card);
+	err = _mmc_detect_card_removed(host);
 
 	mmc_release_host(host);
 
@@ -1019,6 +1027,7 @@ static const struct mmc_bus_ops mmc_sdio_ops = {
 	.suspend = mmc_sdio_suspend,
 	.resume = mmc_sdio_resume,
 	.power_restore = mmc_sdio_power_restore,
+	.alive = mmc_sdio_alive,
 };
 
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 534974c3ef0c..6402d9224d6a 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -209,6 +209,7 @@ struct mmc_card {
 #define MMC_STATE_HIGHSPEED_DDR (1<<4)		/* card is in high speed mode */
 #define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
 #define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
+#define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -370,6 +371,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_sd_card_uhs(c)	((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
+#define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
@@ -379,6 +381,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
+#define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
 
 /*
  * Quirk add/remove for MMC products.
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 174a844a5dda..87a976cc5654 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -180,6 +180,8 @@ extern int mmc_try_claim_host(struct mmc_host *host);
 
 extern int mmc_flush_cache(struct mmc_card *);
 
+extern int mmc_detect_card_removed(struct mmc_host *host);
+
 /**
  *	mmc_claim_host - exclusively claim a host
  *	@host: mmc host to claim
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 706f72279a17..9a03d0335745 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -299,6 +299,7 @@ struct mmc_host {
 	int			claim_cnt;	/* "claim" nesting count */
 
 	struct delayed_work	detect;
+	int			detect_change;	/* card detect flag */
 
 	const struct mmc_bus_ops *bus_ops;	/* current bus driver */
 	unsigned int		bus_refs;	/* reference counter */
-- 
cgit v1.2.3


From add710eaa88606de8ba98a014d37178579e6dbaf Mon Sep 17 00:00:00 2001
From: Johan Rudholm <johan.rudholm@stericsson.com>
Date: Fri, 2 Dec 2011 08:51:06 +0100
Subject: mmc: boot partition ro lock support

Enable boot partitions to be read-only locked until next power on via
a sysfs entry. There will be one sysfs entry for each boot partition:

/sys/block/mmcblkXbootY/ro_lock_until_next_power_on

Each boot partition is locked by writing 1 to its file.

Signed-off-by: Johan Rudholm <johan.rudholm@stericsson.com>
Signed-off-by: John Beckett <john.beckett@stericsson.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 Documentation/mmc/mmc-dev-parts.txt |  13 ++++
 drivers/mmc/card/block.c            | 121 +++++++++++++++++++++++++++++++++---
 drivers/mmc/core/mmc.c              |  14 ++++-
 include/linux/mmc/card.h            |  10 ++-
 include/linux/mmc/mmc.h             |   6 ++
 5 files changed, 153 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt
index 2db28b8e662f..f08d078d43cf 100644
--- a/Documentation/mmc/mmc-dev-parts.txt
+++ b/Documentation/mmc/mmc-dev-parts.txt
@@ -25,3 +25,16 @@ echo 0 > /sys/block/mmcblkXbootY/force_ro
 To re-enable read-only access:
 
 echo 1 > /sys/block/mmcblkXbootY/force_ro
+
+The boot partitions can also be locked read only until the next power on,
+with:
+
+echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
+
+This is a feature of the card and not of the kernel. If the card does
+not support boot partition locking, the file will not exist. If the
+feature has been disabled on the card, the file will be read-only.
+
+The boot partitions can also be locked permanently, but this feature is
+not accessible through sysfs in order to avoid accidental or malicious
+bricking.
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index ad0fb8d74dda..0c959c96005e 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -107,6 +107,8 @@ struct mmc_blk_data {
 	 */
 	unsigned int	part_curr;
 	struct device_attribute force_ro;
+	struct device_attribute power_ro_lock;
+	int	area_type;
 };
 
 static DEFINE_MUTEX(open_lock);
@@ -165,6 +167,70 @@ static void mmc_blk_put(struct mmc_blk_data *md)
 	mutex_unlock(&open_lock);
 }
 
+static ssize_t power_ro_lock_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int ret;
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	struct mmc_card *card = md->queue.card;
+	int locked = 0;
+
+	if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PERM_WP_EN)
+		locked = 2;
+	else if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_EN)
+		locked = 1;
+
+	ret = snprintf(buf, PAGE_SIZE, "%d\n", locked);
+
+	return ret;
+}
+
+static ssize_t power_ro_lock_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	struct mmc_blk_data *md, *part_md;
+	struct mmc_card *card;
+	unsigned long set;
+
+	if (kstrtoul(buf, 0, &set))
+		return -EINVAL;
+
+	if (set != 1)
+		return count;
+
+	md = mmc_blk_get(dev_to_disk(dev));
+	card = md->queue.card;
+
+	mmc_claim_host(card->host);
+
+	ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_WP,
+				card->ext_csd.boot_ro_lock |
+				EXT_CSD_BOOT_WP_B_PWR_WP_EN,
+				card->ext_csd.part_time);
+	if (ret)
+		pr_err("%s: Locking boot partition ro until next power on failed: %d\n", md->disk->disk_name, ret);
+	else
+		card->ext_csd.boot_ro_lock |= EXT_CSD_BOOT_WP_B_PWR_WP_EN;
+
+	mmc_release_host(card->host);
+
+	if (!ret) {
+		pr_info("%s: Locking boot partition ro until next power on\n",
+			md->disk->disk_name);
+		set_disk_ro(md->disk, 1);
+
+		list_for_each_entry(part_md, &md->part, part)
+			if (part_md->area_type == MMC_BLK_DATA_AREA_BOOT) {
+				pr_info("%s: Locking boot partition ro until next power on\n", part_md->disk->disk_name);
+				set_disk_ro(part_md->disk, 1);
+			}
+	}
+
+	mmc_blk_put(md);
+	return count;
+}
+
 static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
@@ -1347,7 +1413,8 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 					      struct device *parent,
 					      sector_t size,
 					      bool default_ro,
-					      const char *subname)
+					      const char *subname,
+					      int area_type)
 {
 	struct mmc_blk_data *md;
 	int devidx, ret;
@@ -1372,11 +1439,12 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 	if (!subname) {
 		md->name_idx = find_first_zero_bit(name_use, max_devices);
 		__set_bit(md->name_idx, name_use);
-	}
-	else
+	} else
 		md->name_idx = ((struct mmc_blk_data *)
 				dev_to_disk(parent)->private_data)->name_idx;
 
+	md->area_type = area_type;
+
 	/*
 	 * Set the read-only status based on the supported commands
 	 * and the write protect switch.
@@ -1470,7 +1538,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 		size = card->csd.capacity << (card->csd.read_blkbits - 9);
 	}
 
-	md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL);
+	md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL,
+					MMC_BLK_DATA_AREA_MAIN);
 	return md;
 }
 
@@ -1479,13 +1548,14 @@ static int mmc_blk_alloc_part(struct mmc_card *card,
 			      unsigned int part_type,
 			      sector_t size,
 			      bool default_ro,
-			      const char *subname)
+			      const char *subname,
+			      int area_type)
 {
 	char cap_str[10];
 	struct mmc_blk_data *part_md;
 
 	part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro,
-				    subname);
+				    subname, area_type);
 	if (IS_ERR(part_md))
 		return PTR_ERR(part_md);
 	part_md->part_type = part_type;
@@ -1518,7 +1588,8 @@ static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md)
 				card->part[idx].part_cfg,
 				card->part[idx].size >> 9,
 				card->part[idx].force_ro,
-				card->part[idx].name);
+				card->part[idx].name,
+				card->part[idx].area_type);
 			if (ret)
 				return ret;
 		}
@@ -1547,9 +1618,16 @@ mmc_blk_set_blksize(struct mmc_blk_data *md, struct mmc_card *card)
 
 static void mmc_blk_remove_req(struct mmc_blk_data *md)
 {
+	struct mmc_card *card;
+
 	if (md) {
+		card = md->queue.card;
 		if (md->disk->flags & GENHD_FL_UP) {
 			device_remove_file(disk_to_dev(md->disk), &md->force_ro);
+			if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
+					card->ext_csd.boot_ro_lockable)
+				device_remove_file(disk_to_dev(md->disk),
+					&md->power_ro_lock);
 
 			/* Stop new requests from getting into the queue */
 			del_gendisk(md->disk);
@@ -1578,6 +1656,7 @@ static void mmc_blk_remove_parts(struct mmc_card *card,
 static int mmc_add_disk(struct mmc_blk_data *md)
 {
 	int ret;
+	struct mmc_card *card = md->queue.card;
 
 	add_disk(md->disk);
 	md->force_ro.show = force_ro_show;
@@ -1587,7 +1666,33 @@ static int mmc_add_disk(struct mmc_blk_data *md)
 	md->force_ro.attr.mode = S_IRUGO | S_IWUSR;
 	ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
 	if (ret)
-		del_gendisk(md->disk);
+		goto force_ro_fail;
+
+	if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
+	     card->ext_csd.boot_ro_lockable) {
+		mode_t mode;
+
+		if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_DIS)
+			mode = S_IRUGO;
+		else
+			mode = S_IRUGO | S_IWUSR;
+
+		md->power_ro_lock.show = power_ro_lock_show;
+		md->power_ro_lock.store = power_ro_lock_store;
+		md->power_ro_lock.attr.mode = mode;
+		md->power_ro_lock.attr.name =
+					"ro_lock_until_next_power_on";
+		ret = device_create_file(disk_to_dev(md->disk),
+				&md->power_ro_lock);
+		if (ret)
+			goto power_ro_lock_fail;
+	}
+	return ret;
+
+power_ro_lock_fail:
+	device_remove_file(disk_to_dev(md->disk), &md->force_ro);
+force_ro_fail:
+	del_gendisk(md->disk);
 
 	return ret;
 }
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index fc1059bb6a08..006e932a3ae3 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -348,7 +348,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 				part_size = ext_csd[EXT_CSD_BOOT_MULT] << 17;
 				mmc_part_add(card, part_size,
 					EXT_CSD_PART_CONFIG_ACC_BOOT0 + idx,
-					"boot%d", idx, true);
+					"boot%d", idx, true,
+					MMC_BLK_DATA_AREA_BOOT);
 			}
 		}
 	}
@@ -435,7 +436,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 					hc_wp_grp_sz);
 				mmc_part_add(card, part_size << 19,
 					EXT_CSD_PART_CONFIG_ACC_GP0 + idx,
-					"gp%d", idx, false);
+					"gp%d", idx, false,
+					MMC_BLK_DATA_AREA_GP);
 			}
 		}
 		card->ext_csd.sec_trim_mult =
@@ -446,6 +448,14 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 			ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT];
 		card->ext_csd.trim_timeout = 300 *
 			ext_csd[EXT_CSD_TRIM_MULT];
+
+		/*
+		 * Note that the call to mmc_part_add above defaults to read
+		 * only. If this default assumption is changed, the call must
+		 * take into account the value of boot_locked below.
+		 */
+		card->ext_csd.boot_ro_lock = ext_csd[EXT_CSD_BOOT_WP];
+		card->ext_csd.boot_ro_lockable = true;
 	}
 
 	if (card->ext_csd.rev >= 5) {
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 6402d9224d6a..9478a6bf1bb1 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -71,6 +71,8 @@ struct mmc_ext_csd {
 	bool			hpi_en;			/* HPI enablebit */
 	bool			hpi;			/* HPI support bit */
 	unsigned int		hpi_cmd;		/* cmd used as HPI */
+	unsigned int		boot_ro_lock;		/* ro lock support */
+	bool			boot_ro_lockable;
 	u8			raw_partition_support;	/* 160 */
 	u8			raw_erased_mem_count;	/* 181 */
 	u8			raw_ext_csd_structure;	/* 194 */
@@ -187,6 +189,10 @@ struct mmc_part {
 	unsigned int	part_cfg;	/* partition type */
 	char	name[MAX_MMC_PART_NAME_LEN];
 	bool	force_ro;	/* to make boot parts RO by default */
+	unsigned int	area_type;
+#define MMC_BLK_DATA_AREA_MAIN	(1<<0)
+#define MMC_BLK_DATA_AREA_BOOT	(1<<1)
+#define MMC_BLK_DATA_AREA_GP	(1<<2)
 };
 
 /*
@@ -265,12 +271,14 @@ struct mmc_card {
  * This function fill contents in mmc_part.
  */
 static inline void mmc_part_add(struct mmc_card *card, unsigned int size,
-			unsigned int part_cfg, char *name, int idx, bool ro)
+			unsigned int part_cfg, char *name, int idx, bool ro,
+			int area_type)
 {
 	card->part[card->nr_parts].size = size;
 	card->part[card->nr_parts].part_cfg = part_cfg;
 	sprintf(card->part[card->nr_parts].name, name, idx);
 	card->part[card->nr_parts].force_ro = ro;
+	card->part[card->nr_parts].area_type = area_type;
 	card->nr_parts++;
 }
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 0e7135697d11..665548e639e8 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -280,6 +280,7 @@ struct _mmc_csd {
 #define EXT_CSD_RST_N_FUNCTION		162	/* R/W */
 #define EXT_CSD_SANITIZE_START		165     /* W */
 #define EXT_CSD_WR_REL_PARAM		166	/* RO */
+#define EXT_CSD_BOOT_WP			173	/* R/W */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
 #define EXT_CSD_PART_CONFIG		179	/* R/W */
 #define EXT_CSD_ERASED_MEM_CONT		181	/* RO */
@@ -321,6 +322,11 @@ struct _mmc_csd {
 
 #define EXT_CSD_WR_REL_PARAM_EN		(1<<2)
 
+#define EXT_CSD_BOOT_WP_B_PWR_WP_DIS	(0x40)
+#define EXT_CSD_BOOT_WP_B_PERM_WP_DIS	(0x10)
+#define EXT_CSD_BOOT_WP_B_PERM_WP_EN	(0x04)
+#define EXT_CSD_BOOT_WP_B_PWR_WP_EN	(0x01)
+
 #define EXT_CSD_PART_CONFIG_ACC_MASK	(0x7)
 #define EXT_CSD_PART_CONFIG_ACC_BOOT0	(0x1)
 #define EXT_CSD_PART_CONFIG_ACC_GP0	(0x4)
-- 
cgit v1.2.3


From 4f408cc67a0613f969d1e02fff6de74d31a29fb3 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Fri, 9 Dec 2011 14:55:52 +0900
Subject: mmc: dw_mmc: Add more capabilities field

This patch adds another capabilities field for MMC_CAPS2_XXX.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 6 ++++++
 include/linux/mmc/dw_mmc.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 44bc11e8761e..69e588960e79 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1681,6 +1681,12 @@ static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 	else
 		mmc->caps = 0;
 
+	if (host->pdata->caps2)
+		mmc->caps2 = host->pdata->caps2;
+	else
+		mmc->caps2 = 0;
+
+
 	if (host->pdata->get_bus_wd)
 		if (host->pdata->get_bus_wd(slot->id) >= 4)
 			mmc->caps |= MMC_CAP_4_BIT_DATA;
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 6dc9b80568a0..e8779c6d1759 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -214,6 +214,7 @@ struct dw_mci_board {
 	unsigned int bus_hz; /* Bus speed */
 
 	unsigned int caps;	/* Capabilities */
+	unsigned int caps2;	/* More capabilities */
 	/*
 	 * Override fifo depth. If 0, autodetect it from the FIFOTH register,
 	 * but note that this may not be reliable after a bootloader has used
-- 
cgit v1.2.3


From b67e198073b2d2f16572f5fa77553fec14775f69 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Sun, 25 Dec 2011 20:40:03 -0500
Subject: mmc: add a card hotplug handler context

SD/MMC controllers provide different card insertion and removal detection
methods. On some of them the controller itself issues an interrupt, on
others polling is used, on yet others auxiliary means are used for this
purpose, e.g., a GPIO IRQ. Further, on some systems one of those methods
can be chosen at driver probing time and configured in software. E.g., on
some systems the SD/MMC controller card hot-plug detection pin can be
configured either as a respective controller functions, or an IRQ-capable
GPIO. To support such flexible configurations a card hot-plug context
is added by this patch.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 include/linux/mmc/host.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 9a03d0335745..742f0e102e1e 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -167,6 +167,11 @@ struct mmc_async_req {
 	int (*err_check) (struct mmc_card *, struct mmc_async_req *);
 };
 
+struct mmc_hotplug {
+	unsigned int irq;
+	void *handler_priv;
+};
+
 struct mmc_host {
 	struct device		*parent;
 	struct device		class_dev;
@@ -300,6 +305,7 @@ struct mmc_host {
 
 	struct delayed_work	detect;
 	int			detect_change;	/* card detect flag */
+	struct mmc_hotplug	hotplug;
 
 	const struct mmc_bus_ops *bus_ops;	/* current bus driver */
 	unsigned int		bus_refs;	/* reference counter */
-- 
cgit v1.2.3


From 349ab52446772a359bc7e7699cae3880d48fa5c9 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Sun, 25 Dec 2011 21:36:02 +0100
Subject: mmc: add a generic GPIO card-detect helper

This patch adds a primitive helper to support card hotplug detection on
platforms, where a GPIO, capable of producing interrupts, is used for
detection of card-insertion and -removal events.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/Makefile   |  2 +-
 drivers/mmc/core/cd-gpio.c  | 74 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/cd-gpio.h | 19 ++++++++++++
 3 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mmc/core/cd-gpio.c
 create mode 100644 include/linux/mmc/cd-gpio.h

(limited to 'include/linux')

diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile
index 639501970b41..dca4428380f1 100644
--- a/drivers/mmc/core/Makefile
+++ b/drivers/mmc/core/Makefile
@@ -7,6 +7,6 @@ mmc_core-y			:= core.o bus.o host.o \
 				   mmc.o mmc_ops.o sd.o sd_ops.o \
 				   sdio.o sdio_ops.o sdio_bus.o \
 				   sdio_cis.o sdio_io.o sdio_irq.o \
-				   quirks.o
+				   quirks.o cd-gpio.o
 
 mmc_core-$(CONFIG_DEBUG_FS)	+= debugfs.o
diff --git a/drivers/mmc/core/cd-gpio.c b/drivers/mmc/core/cd-gpio.c
new file mode 100644
index 000000000000..082202ae4a03
--- /dev/null
+++ b/drivers/mmc/core/cd-gpio.c
@@ -0,0 +1,74 @@
+/*
+ * Generic GPIO card-detect helper
+ *
+ * Copyright (C) 2011, Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/mmc/host.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+struct mmc_cd_gpio {
+	unsigned int gpio;
+	char label[0];
+};
+
+static irqreturn_t mmc_cd_gpio_irqt(int irq, void *dev_id)
+{
+	/* Schedule a card detection after a debounce timeout */
+	mmc_detect_change(dev_id, msecs_to_jiffies(100));
+	return IRQ_HANDLED;
+}
+
+int mmc_cd_gpio_request(struct mmc_host *host, unsigned int gpio,
+			unsigned int irq, unsigned long flags)
+{
+	size_t len = strlen(dev_name(host->parent)) + 4;
+	struct mmc_cd_gpio *cd = kmalloc(sizeof(*cd) + len, GFP_KERNEL);
+	int ret;
+
+	if (!cd)
+		return -ENOMEM;
+
+	snprintf(cd->label, len, "%s cd", dev_name(host->parent));
+
+	ret = gpio_request_one(gpio, GPIOF_DIR_IN, cd->label);
+	if (ret < 0)
+		goto egpioreq;
+
+	ret = request_threaded_irq(irq, NULL, mmc_cd_gpio_irqt,
+				   flags, cd->label, host);
+	if (ret < 0)
+		goto eirqreq;
+
+	cd->gpio = gpio;
+	host->hotplug.irq = irq;
+	host->hotplug.handler_priv = cd;
+
+	return 0;
+
+eirqreq:
+	gpio_free(gpio);
+egpioreq:
+	kfree(cd);
+	return ret;
+}
+EXPORT_SYMBOL(mmc_cd_gpio_request);
+
+void mmc_cd_gpio_free(struct mmc_host *host)
+{
+	struct mmc_cd_gpio *cd = host->hotplug.handler_priv;
+
+	free_irq(host->hotplug.irq, host);
+	gpio_free(cd->gpio);
+	kfree(cd);
+}
+EXPORT_SYMBOL(mmc_cd_gpio_free);
diff --git a/include/linux/mmc/cd-gpio.h b/include/linux/mmc/cd-gpio.h
new file mode 100644
index 000000000000..a8e469783318
--- /dev/null
+++ b/include/linux/mmc/cd-gpio.h
@@ -0,0 +1,19 @@
+/*
+ * Generic GPIO card-detect helper header
+ *
+ * Copyright (C) 2011, Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MMC_CD_GPIO_H
+#define MMC_CD_GPIO_H
+
+struct mmc_host;
+int mmc_cd_gpio_request(struct mmc_host *host, unsigned int gpio,
+			unsigned int irq, unsigned long flags);
+void mmc_cd_gpio_free(struct mmc_host *host);
+
+#endif
-- 
cgit v1.2.3


From 52c506f0bc72530fb786838e7ffd4f158a2e5c3a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Dec 2011 15:48:43 +0200
Subject: mmc: sdhci-pci: add platform data

Add a means of getting platform data for the SDHCI PCI
devices.  The data is stored against the slot not the
device in order to support multi-slot devices.

The data allows platform-specific setup (such as getting
GPIO numbers from firmware or setting up wl12xx for SDIO)
to be done in platform support files instead of the
sdhci-pci driver.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/Makefile                   |  2 +-
 drivers/mmc/Makefile               |  3 +--
 drivers/mmc/host/Makefile          |  1 +
 drivers/mmc/host/sdhci-pci-data.c  |  5 +++++
 drivers/mmc/host/sdhci-pci.c       | 32 ++++++++++++++++++++++++++++----
 include/linux/mmc/sdhci-pci-data.h | 18 ++++++++++++++++++
 6 files changed, 54 insertions(+), 7 deletions(-)
 create mode 100644 drivers/mmc/host/sdhci-pci-data.c
 create mode 100644 include/linux/mmc/sdhci-pci-data.h

(limited to 'include/linux')

diff --git a/drivers/Makefile b/drivers/Makefile
index 1b3142127bf5..c07be024b962 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -97,7 +97,7 @@ obj-$(CONFIG_EISA)		+= eisa/
 obj-y				+= lguest/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
-obj-$(CONFIG_MMC)		+= mmc/
+obj-y				+= mmc/
 obj-$(CONFIG_MEMSTICK)		+= memstick/
 obj-y				+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index 12eef393e216..400756ec7c49 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile
@@ -6,5 +6,4 @@ subdir-ccflags-$(CONFIG_MMC_DEBUG) := -DDEBUG
 
 obj-$(CONFIG_MMC)		+= core/
 obj-$(CONFIG_MMC)		+= card/
-obj-$(CONFIG_MMC)		+= host/
-
+obj-$(subst m,y,$(CONFIG_MMC))	+= host/
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index b4b83f302e32..745f8fce2519 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_MMC_MXC)		+= mxcmmc.o
 obj-$(CONFIG_MMC_MXS)		+= mxs-mmc.o
 obj-$(CONFIG_MMC_SDHCI)		+= sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)	+= sdhci-pci.o
+obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI))	+= sdhci-pci-data.o
 obj-$(CONFIG_MMC_SDHCI_PXAV3)	+= sdhci-pxav3.o
 obj-$(CONFIG_MMC_SDHCI_PXAV2)	+= sdhci-pxav2.o
 obj-$(CONFIG_MMC_SDHCI_S3C)	+= sdhci-s3c.o
diff --git a/drivers/mmc/host/sdhci-pci-data.c b/drivers/mmc/host/sdhci-pci-data.c
new file mode 100644
index 000000000000..a611217769f5
--- /dev/null
+++ b/drivers/mmc/host/sdhci-pci-data.c
@@ -0,0 +1,5 @@
+#include <linux/module.h>
+#include <linux/mmc/sdhci-pci-data.h>
+
+struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, int slotno);
+EXPORT_SYMBOL_GPL(sdhci_pci_get_data);
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index d2e77fb21b26..4e8f324e2c84 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -25,6 +25,7 @@
 #include <linux/gpio.h>
 #include <linux/sfi.h>
 #include <linux/pm_runtime.h>
+#include <linux/mmc/sdhci-pci-data.h>
 
 #include "sdhci.h"
 
@@ -61,6 +62,7 @@ struct sdhci_pci_fixes {
 struct sdhci_pci_slot {
 	struct sdhci_pci_chip	*chip;
 	struct sdhci_host	*host;
+	struct sdhci_pci_data	*data;
 
 	int			pci_bar;
 	int			rst_n_gpio;
@@ -1188,11 +1190,12 @@ static const struct dev_pm_ops sdhci_pci_pm_ops = {
 \*****************************************************************************/
 
 static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
-	struct pci_dev *pdev, struct sdhci_pci_chip *chip, int bar)
+	struct pci_dev *pdev, struct sdhci_pci_chip *chip, int first_bar,
+	int slotno)
 {
 	struct sdhci_pci_slot *slot;
 	struct sdhci_host *host;
-	int ret;
+	int ret, bar = first_bar + slotno;
 
 	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
 		dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar);
@@ -1227,6 +1230,20 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
 	slot->pci_bar = bar;
 	slot->rst_n_gpio = -EINVAL;
 
+	/* Retrieve platform data if there is any */
+	if (*sdhci_pci_get_data)
+		slot->data = sdhci_pci_get_data(pdev, slotno);
+
+	if (slot->data) {
+		if (slot->data->setup) {
+			ret = slot->data->setup(slot->data);
+			if (ret) {
+				dev_err(&pdev->dev, "platform setup failed\n");
+				goto free;
+			}
+		}
+	}
+
 	host->hw_name = "PCI";
 	host->ops = &sdhci_pci_ops;
 	host->quirks = chip->quirks;
@@ -1236,7 +1253,7 @@ static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
 	ret = pci_request_region(pdev, bar, mmc_hostname(host->mmc));
 	if (ret) {
 		dev_err(&pdev->dev, "cannot request region\n");
-		goto free;
+		goto cleanup;
 	}
 
 	host->ioaddr = pci_ioremap_bar(pdev, bar);
@@ -1270,6 +1287,10 @@ unmap:
 release:
 	pci_release_region(pdev, bar);
 
+cleanup:
+	if (slot->data && slot->data->cleanup)
+		slot->data->cleanup(slot->data);
+
 free:
 	sdhci_free_host(host);
 
@@ -1291,6 +1312,9 @@ static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot)
 	if (slot->chip->fixes && slot->chip->fixes->remove_slot)
 		slot->chip->fixes->remove_slot(slot, dead);
 
+	if (slot->data && slot->data->cleanup)
+		slot->data->cleanup(slot->data);
+
 	pci_release_region(slot->chip->pdev, slot->pci_bar);
 
 	sdhci_free_host(slot->host);
@@ -1377,7 +1401,7 @@ static int __devinit sdhci_pci_probe(struct pci_dev *pdev,
 	slots = chip->num_slots;	/* Quirk may have changed this */
 
 	for (i = 0; i < slots; i++) {
-		slot = sdhci_pci_probe_slot(pdev, chip, first_bar + i);
+		slot = sdhci_pci_probe_slot(pdev, chip, first_bar, i);
 		if (IS_ERR(slot)) {
 			for (i--; i >= 0; i--)
 				sdhci_pci_remove_slot(chip->slots[i]);
diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h
new file mode 100644
index 000000000000..8959604a13d3
--- /dev/null
+++ b/include/linux/mmc/sdhci-pci-data.h
@@ -0,0 +1,18 @@
+#ifndef LINUX_MMC_SDHCI_PCI_DATA_H
+#define LINUX_MMC_SDHCI_PCI_DATA_H
+
+struct pci_dev;
+
+struct sdhci_pci_data {
+	struct pci_dev	*pdev;
+	int		slotno;
+	int		rst_n_gpio; /* Set to -EINVAL if unused */
+	int		cd_gpio;    /* Set to -EINVAL if unused */
+	int		(*setup)(struct sdhci_pci_data *data);
+	void		(*cleanup)(struct sdhci_pci_data *data);
+};
+
+extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev,
+				int slotno);
+
+#endif
-- 
cgit v1.2.3


From e2a0a5829c4069ee4a0f28c7301187ffaba91a46 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 27 Dec 2011 15:48:45 +0200
Subject: mmc: sdhci-pci: remove SDHCI_QUIRK2_OWN_CARD_DETECTION

Even if a driver provides separate card detection, an interrupt
is still needed to abort mmc requests that are in progress.
SDHCI_QUIRK2_OWN_CARD_DETECTION prevents that, so remove it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci-pci.c | 1 -
 drivers/mmc/host/sdhci.c     | 1 -
 include/linux/mmc/sdhci.h    | 2 --
 3 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 646680a5993a..83a152e9b976 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -212,7 +212,6 @@ static void sdhci_pci_add_own_cd(struct sdhci_pci_slot *slot)
 
 	slot->cd_gpio = gpio;
 	slot->cd_irq = irq;
-	slot->host->quirks2 |= SDHCI_QUIRK2_OWN_CARD_DETECTION;
 
 	return;
 
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index e6c6cd6e95f2..6f1fd02fe01b 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -147,7 +147,6 @@ static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
 	u32 present, irqs;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) ||
-	    (host->quirks2 & SDHCI_QUIRK2_OWN_CARD_DETECTION) ||
 	    !mmc_card_is_removable(host->mmc))
 		return;
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index e4b69353678d..dad7a469f09c 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -90,8 +90,6 @@ struct sdhci_host {
 
 	unsigned int quirks2;	/* More deviations from spec. */
 
-#define SDHCI_QUIRK2_OWN_CARD_DETECTION			(1<<0)
-
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
 
-- 
cgit v1.2.3


From aa9df4fb2adcc73d36fa41e23059519be770aaa5 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@stericsson.com>
Date: Mon, 19 Dec 2011 16:24:19 +0100
Subject: mmc: core: Add option to prevent eMMC sleep command

Host may now use MMC_CAP2_NO_SLEEP_CMD to disable the use
of eMMC sleep/awake command.

This option can be used when your platform has a buggy
kernel crash dump software, which is supposed to store
the dump on the eMMC, but is not able to wake up the eMMC
from sleep state.

In particular, failures have been seen with u-boot; even if
it is fixed there, platforms will be slow to update their
bootloader binaries.

Signed-off-by: Ulf Hansson <ulf.hansson@stericsson.com>
Reviewed-by: Hanumath Prasad <hanumath.prasad@stericsson.com>
Reviewed-by: Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
Acked-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 6 ++++++
 include/linux/mmc/host.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 22050525be84..be7569f3fb56 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2246,6 +2246,9 @@ int mmc_card_awake(struct mmc_host *host)
 {
 	int err = -ENOSYS;
 
+	if (host->caps2 & MMC_CAP2_NO_SLEEP_CMD)
+		return 0;
+
 	mmc_bus_get(host);
 
 	if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
@@ -2261,6 +2264,9 @@ int mmc_card_sleep(struct mmc_host *host)
 {
 	int err = -ENOSYS;
 
+	if (host->caps2 & MMC_CAP2_NO_SLEEP_CMD)
+		return 0;
+
 	mmc_bus_get(host);
 
 	if (host->bus_ops && !host->bus_dead && host->bus_ops->sleep)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 742f0e102e1e..031d865167a2 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -247,6 +247,7 @@ struct mmc_host {
 #define MMC_CAP2_CACHE_CTRL	(1 << 1)	/* Allow cache control */
 #define MMC_CAP2_POWEROFF_NOTIFY (1 << 2)	/* Notify poweroff supported */
 #define MMC_CAP2_NO_MULTI_READ	(1 << 3)	/* Multiblock reads don't work */
+#define MMC_CAP2_NO_SLEEP_CMD	(1 << 4)	/* Don't allow sleep command */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 	unsigned int        power_notify_type;
-- 
cgit v1.2.3


From 7b21e34fd1c272e3a8c3846168f2f6287a4cd72b Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Jan 2012 15:44:42 +1030
Subject: virtio: harsher barriers for rpmsg.

We were cheating with our barriers; using the smp ones rather than the
real device ones.  That was fine, until rpmsg came along, which is
used to talk to a real device (a non-SMP CPU).

Unfortunately, just putting back the real barriers (reverting
d57ed95d) causes a performance regression on virtio-pci.  In
particular, Amos reports netbench's TCP_RR over virtio_net CPU
utilization increased up to 35% while throughput went down by up to
14%.

By comparison, this branch is in the noise.

Reference: https://lkml.org/lkml/2011/12/11/22

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/lguest/lguest_device.c |  8 +++++---
 drivers/s390/kvm/kvm_virtio.c  |  2 +-
 drivers/virtio/virtio_mmio.c   |  4 ++--
 drivers/virtio/virtio_pci.c    |  4 ++--
 drivers/virtio/virtio_ring.c   | 34 +++++++++++++++++++++-------------
 include/linux/virtio_ring.h    |  1 +
 tools/virtio/linux/virtio.h    |  1 +
 tools/virtio/virtio_test.c     |  3 ++-
 8 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 595d73197016..6a1d6447b864 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -292,10 +292,12 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 
 	/*
 	 * OK, tell virtio_ring.c to set up a virtqueue now we know its size
-	 * and we've got a pointer to its pages.
+	 * and we've got a pointer to its pages.  Note that we set weak_barriers
+	 * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
+	 * barriers.
 	 */
-	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
-				 vdev, lvq->pages, lg_notify, callback, name);
+	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev,
+				 true, lvq->pages, lg_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 8af868bab20b..7bc1955337ea 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -198,7 +198,7 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev,
 		goto out;
 
 	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
-				 vdev, (void *) config->address,
+				 vdev, true, (void *) config->address,
 				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 0269717436af..01d6dc250d5c 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -310,8 +310,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 			vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
 
 	/* Create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN,
-				 vdev, info->queue, vm_notify, callback, name);
+	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
+				 true, info->queue, vm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto error_new_virtqueue;
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index baabb7937ec2..688b42d28dad 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -414,8 +414,8 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback, name);
+	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
+				 true, info->queue, vp_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index c7a2c208f6ea..50da92046092 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -28,17 +28,20 @@
 #ifdef CONFIG_SMP
 /* Where possible, use SMP barriers which are more lightweight than mandatory
  * barriers, because mandatory barriers control MMIO effects on accesses
- * through relaxed memory I/O windows (which virtio does not use). */
-#define virtio_mb() smp_mb()
-#define virtio_rmb() smp_rmb()
-#define virtio_wmb() smp_wmb()
+ * through relaxed memory I/O windows (which virtio-pci does not use). */
+#define virtio_mb(vq) \
+	do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0)
+#define virtio_rmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
+#define virtio_wmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
 #else
 /* We must force memory ordering even if guest is UP since host could be
  * running on another CPU, but SMP barriers are defined to barrier() in that
  * configuration. So fall back to mandatory barriers instead. */
-#define virtio_mb() mb()
-#define virtio_rmb() rmb()
-#define virtio_wmb() wmb()
+#define virtio_mb(vq) mb()
+#define virtio_rmb(vq) rmb()
+#define virtio_wmb(vq) wmb()
 #endif
 
 #ifdef DEBUG
@@ -77,6 +80,9 @@ struct vring_virtqueue
 	/* Actual memory layout for this queue */
 	struct vring vring;
 
+	/* Can we use weak barriers? */
+	bool weak_barriers;
+
 	/* Other side has made a mess, don't try any more. */
 	bool broken;
 
@@ -245,14 +251,14 @@ void virtqueue_kick(struct virtqueue *_vq)
 	START_USE(vq);
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
-	virtio_wmb();
+	virtio_wmb(vq);
 
 	old = vq->vring.avail->idx;
 	new = vq->vring.avail->idx = old + vq->num_added;
 	vq->num_added = 0;
 
 	/* Need to update avail index before checking if we should notify */
-	virtio_mb();
+	virtio_mb(vq);
 
 	if (vq->event ?
 	    vring_need_event(vring_avail_event(&vq->vring), new, old) :
@@ -314,7 +320,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 	}
 
 	/* Only get used array entries after they have been exposed by host. */
-	virtio_rmb();
+	virtio_rmb(vq);
 
 	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
 	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
@@ -337,7 +343,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 	 * the read in the next get_buf call. */
 	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
 		vring_used_event(&vq->vring) = vq->last_used_idx;
-		virtio_mb();
+		virtio_mb(vq);
 	}
 
 	END_USE(vq);
@@ -366,7 +372,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
 	 * entry. Always do both to keep code simple. */
 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
 	vring_used_event(&vq->vring) = vq->last_used_idx;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely(more_used(vq))) {
 		END_USE(vq);
 		return false;
@@ -393,7 +399,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 	/* TODO: tune this threshold */
 	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
 	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
 		END_USE(vq);
 		return false;
@@ -453,6 +459,7 @@ EXPORT_SYMBOL_GPL(vring_interrupt);
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
 				      void (*callback)(struct virtqueue *),
@@ -476,6 +483,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
 	vq->notify = notify;
+	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 36be0f6e18a9..e338730c2660 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -168,6 +168,7 @@ struct virtqueue;
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 669bcdd45805..953db2abf6b9 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -214,6 +214,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 74d3331bdaf9..0740284396c1 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -92,7 +92,8 @@ static void vq_info_add(struct vdev_info *dev, int num)
 	assert(r >= 0);
 	memset(info->ring, 0, vring_size(num, 4096));
 	vring_init(&info->vring, num, info->ring, 4096);
-	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring,
+	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev,
+				       true, info->ring,
 				       vq_notify, vq_callback, "test");
 	assert(info->vq);
 	info->vq->priv = info;
-- 
cgit v1.2.3


From 5dfc17628d57f9e62043ed0cba03a6e3eb019a78 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Jan 2012 15:44:42 +1030
Subject: virtio: document functions better.

The old documentation is left over from when we used a structure with
strategy pointers.

And move the documentation to the C file as per kernel practice.
Though I disagree...

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/virtio/virtio_ring.c | 92 +++++++++++++++++++++++++++++++++++++++++++-
 include/linux/virtio.h       | 47 ----------------------
 2 files changed, 91 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 50da92046092..fe50486341a4 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -166,6 +166,23 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
 	return head;
 }
 
+/**
+ * virtqueue_add_buf_gfp - expose buffer to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: the description of the buffer(s).
+ * @out_num: the number of sg readable by other side
+ * @in_num: the number of sg which are writable (after readable ones)
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns remaining capacity of queue or a negative error
+ * (ie. ENOSPC).  Note that it only really makes sense to treat all
+ * positive return values as "available": indirect buffers mean that
+ * we can put an entire sg[] array inside a single queue entry.
+ */
 int virtqueue_add_buf_gfp(struct virtqueue *_vq,
 			  struct scatterlist sg[],
 			  unsigned int out,
@@ -244,6 +261,16 @@ add_head:
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
 
+/**
+ * virtqueue_kick - update after add_buf
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_buf_gfp calls, invoke this to kick
+ * the other side.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 void virtqueue_kick(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -300,6 +327,22 @@ static inline bool more_used(const struct vring_virtqueue *vq)
 	return vq->last_used_idx != vq->vring.used->idx;
 }
 
+/**
+ * virtqueue_get_buf - get the next used buffer
+ * @vq: the struct virtqueue we're talking about.
+ * @len: the length written into the buffer
+ *
+ * If the driver wrote data into the buffer, @len will be set to the
+ * amount written.  This means you don't need to clear the buffer
+ * beforehand to ensure there's no data leakage in the case of short
+ * writes.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns NULL if there are no used buffers, or the "data" token
+ * handed to virtqueue_add_buf_gfp().
+ */
 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -351,6 +394,15 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 
+/**
+ * virtqueue_disable_cb - disable callbacks
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Note that this is not necessarily synchronous, hence unreliable and only
+ * useful as an optimization.
+ *
+ * Unlike other operations, this need not be serialized.
+ */
 void virtqueue_disable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -359,6 +411,17 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
+/**
+ * virtqueue_enable_cb - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks; it returns "false" if there are pending
+ * buffers in the queue, to detect a possible race between the driver
+ * checking for more work, and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 bool virtqueue_enable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -383,6 +446,19 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 
+/**
+ * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks but hints to the other side to delay
+ * interrupts until most of the available buffers have been processed;
+ * it returns "false" if there are many pending buffers in the queue,
+ * to detect a possible race between the driver checking for more work,
+ * and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -410,6 +486,14 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
+/**
+ * virtqueue_detach_unused_buf - detach first unused buffer
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Returns NULL or the "data" token handed to virtqueue_add_buf_gfp().
+ * This is not valid on an active queue; it is useful only for device
+ * shutdown.
+ */
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -538,7 +622,13 @@ void vring_transport_features(struct virtio_device *vdev)
 }
 EXPORT_SYMBOL_GPL(vring_transport_features);
 
-/* return the size of the vring within the virtqueue */
+/**
+ * virtqueue_get_vring_size - return the size of the virtqueue's vring
+ * @vq: the struct virtqueue containing the vring of interest.
+ *
+ * Returns the size of the vring.  This is mainly used for boasting to
+ * userspace.  Unlike other operations, this need not be serialized.
+ */
 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
 {
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4c069d8bd740..73ad7243128f 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -25,53 +25,6 @@ struct virtqueue {
 	void *priv;
 };
 
-/**
- * operations for virtqueue
- * virtqueue_add_buf: expose buffer to other end
- *	vq: the struct virtqueue we're talking about.
- *	sg: the description of the buffer(s).
- *	out_num: the number of sg readable by other side
- *	in_num: the number of sg which are writable (after readable ones)
- *	data: the token identifying the buffer.
- *	gfp: how to do memory allocations (if necessary).
- *      Returns remaining capacity of queue (sg segments) or a negative error.
- * virtqueue_kick: update after add_buf
- *	vq: the struct virtqueue
- *	After one or more add_buf calls, invoke this to kick the other side.
- * virtqueue_get_buf: get the next used buffer
- *	vq: the struct virtqueue we're talking about.
- *	len: the length written into the buffer
- *	Returns NULL or the "data" token handed to add_buf.
- * virtqueue_disable_cb: disable callbacks
- *	vq: the struct virtqueue we're talking about.
- *	Note that this is not necessarily synchronous, hence unreliable and only
- *	useful as an optimization.
- * virtqueue_enable_cb: restart callbacks after disable_cb.
- *	vq: the struct virtqueue we're talking about.
- *	This re-enables callbacks; it returns "false" if there are pending
- *	buffers in the queue, to detect a possible race between the driver
- *	checking for more work, and enabling callbacks.
- * virtqueue_enable_cb_delayed: restart callbacks after disable_cb.
- *	vq: the struct virtqueue we're talking about.
- *	This re-enables callbacks but hints to the other side to delay
- *	interrupts until most of the available buffers have been processed;
- *	it returns "false" if there are many pending buffers in the queue,
- *	to detect a possible race between the driver checking for more work,
- *	and enabling callbacks.
- * virtqueue_detach_unused_buf: detach first unused buffer
- * 	vq: the struct virtqueue we're talking about.
- * 	Returns NULL or the "data" token handed to add_buf
- * virtqueue_get_vring_size: return the size of the virtqueue's vring
- *	vq: the struct virtqueue containing the vring of interest.
- *	Returns the size of the vring.
- *
- * Locking rules are straightforward: the driver is responsible for
- * locking.  No two operations may be invoked simultaneously, with the exception
- * of virtqueue_disable_cb.
- *
- * All operations can be called in any context.
- */
-
 int virtqueue_add_buf_gfp(struct virtqueue *vq,
 			  struct scatterlist sg[],
 			  unsigned int out_num,
-- 
cgit v1.2.3


From f96fde41f7f9af6cf20f6a1919f5d9670f84d574 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Jan 2012 15:44:42 +1030
Subject: virtio: rename virtqueue_add_buf_gfp to virtqueue_add_buf

Remove wrapper functions. This makes the allocation type explicit in
all callers; I used GPF_KERNEL where it seemed obvious, left it at
GFP_ATOMIC otherwise.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/virtio_blk.c          |  2 +-
 drivers/char/hw_random/virtio-rng.c |  2 +-
 drivers/char/virtio_console.c       |  6 +++---
 drivers/net/virtio_net.c            | 12 ++++++------
 drivers/virtio/virtio_balloon.c     |  7 ++++---
 drivers/virtio/virtio_ring.c        | 22 +++++++++++-----------
 include/linux/virtio.h              | 21 ++++++---------------
 net/9p/trans_virtio.c               |  6 ++++--
 tools/virtio/linux/virtio.h         | 21 ++++++---------------
 tools/virtio/virtio_test.c          |  3 ++-
 10 files changed, 44 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4d0b70adf5f7..a345e40e1bca 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -172,7 +172,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		}
 	}
 
-	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
+	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
 		mempool_free(vbr, vblk->pool);
 		return false;
 	}
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index fd699ccecf5b..723725bbb96b 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -47,7 +47,7 @@ static void register_buffer(u8 *buf, size_t size)
 	sg_init_one(&sg, buf, size);
 
 	/* There should always be room for one buffer. */
-	if (virtqueue_add_buf(vq, &sg, 0, 1, buf) < 0)
+	if (virtqueue_add_buf(vq, &sg, 0, 1, buf, GFP_KERNEL) < 0)
 		BUG();
 
 	virtqueue_kick(vq);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 8e3c46d67cb3..d1ae1492ee78 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -392,7 +392,7 @@ static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf)
 
 	sg_init_one(sg, buf->buf, buf->size);
 
-	ret = virtqueue_add_buf(vq, sg, 0, 1, buf);
+	ret = virtqueue_add_buf(vq, sg, 0, 1, buf, GFP_ATOMIC);
 	virtqueue_kick(vq);
 	return ret;
 }
@@ -457,7 +457,7 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id,
 	vq = portdev->c_ovq;
 
 	sg_init_one(sg, &cpkt, sizeof(cpkt));
-	if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt) >= 0) {
+	if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) >= 0) {
 		virtqueue_kick(vq);
 		while (!virtqueue_get_buf(vq, &len))
 			cpu_relax();
@@ -506,7 +506,7 @@ static ssize_t send_buf(struct port *port, void *in_buf, size_t in_count,
 	reclaim_consumed_buffers(port);
 
 	sg_init_one(sg, in_buf, in_count);
-	ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf);
+	ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC);
 
 	/* Tell Host to go! */
 	virtqueue_kick(out_vq);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 76fe14efb2b5..6345a52194f9 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -370,7 +370,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
 
 	skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len);
 
-	err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
+	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
 	if (err < 0)
 		dev_kfree_skb(skb);
 
@@ -415,8 +415,8 @@ static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp)
 
 	/* chain first in list head */
 	first->private = (unsigned long)list;
-	err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
-				    first, gfp);
+	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
+				first, gfp);
 	if (err < 0)
 		give_pages(vi, first);
 
@@ -434,7 +434,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
 
 	sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE);
 
-	err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
+	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
 	if (err < 0)
 		give_pages(vi, page);
 
@@ -609,7 +609,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 
 	hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1;
 	return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg,
-					0, skb);
+				 0, skb, GFP_ATOMIC);
 }
 
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -767,7 +767,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
 	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
 
-	BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi) < 0);
+	BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0);
 
 	virtqueue_kick(vi->cvq);
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index f64ff185b8b5..0a6425aadf95 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -88,7 +88,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 	init_completion(&vb->acked);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0)
+	if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
 		BUG();
 	virtqueue_kick(vq);
 
@@ -220,7 +220,7 @@ static void stats_handle_request(struct virtio_balloon *vb)
 
 	vq = vb->stats_vq;
 	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
-	if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0)
+	if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
 		BUG();
 	virtqueue_kick(vq);
 }
@@ -313,7 +313,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
 		 * use it to signal us later.
 		 */
 		sg_init_one(&sg, vb->stats, sizeof vb->stats);
-		if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0)
+		if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb, GFP_KERNEL)
+		    < 0)
 			BUG();
 		virtqueue_kick(vb->stats_vq);
 	}
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index fe50486341a4..6ea92a6d1134 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -167,7 +167,7 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
 }
 
 /**
- * virtqueue_add_buf_gfp - expose buffer to other end
+ * virtqueue_add_buf - expose buffer to other end
  * @vq: the struct virtqueue we're talking about.
  * @sg: the description of the buffer(s).
  * @out_num: the number of sg readable by other side
@@ -183,12 +183,12 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
  * positive return values as "available": indirect buffers mean that
  * we can put an entire sg[] array inside a single queue entry.
  */
-int virtqueue_add_buf_gfp(struct virtqueue *_vq,
-			  struct scatterlist sg[],
-			  unsigned int out,
-			  unsigned int in,
-			  void *data,
-			  gfp_t gfp)
+int virtqueue_add_buf(struct virtqueue *_vq,
+		      struct scatterlist sg[],
+		      unsigned int out,
+		      unsigned int in,
+		      void *data,
+		      gfp_t gfp)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i, avail, uninitialized_var(prev);
@@ -259,13 +259,13 @@ add_head:
 
 	return vq->num_free;
 }
-EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
+EXPORT_SYMBOL_GPL(virtqueue_add_buf);
 
 /**
  * virtqueue_kick - update after add_buf
  * @vq: the struct virtqueue
  *
- * After one or more virtqueue_add_buf_gfp calls, invoke this to kick
+ * After one or more virtqueue_add_buf calls, invoke this to kick
  * the other side.
  *
  * Caller must ensure we don't call this with other virtqueue
@@ -341,7 +341,7 @@ static inline bool more_used(const struct vring_virtqueue *vq)
  * operations at the same time (except where noted).
  *
  * Returns NULL if there are no used buffers, or the "data" token
- * handed to virtqueue_add_buf_gfp().
+ * handed to virtqueue_add_buf().
  */
 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 {
@@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
  * virtqueue_detach_unused_buf - detach first unused buffer
  * @vq: the struct virtqueue we're talking about.
  *
- * Returns NULL or the "data" token handed to virtqueue_add_buf_gfp().
+ * Returns NULL or the "data" token handed to virtqueue_add_buf().
  * This is not valid on an active queue; it is useful only for device
  * shutdown.
  */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 73ad7243128f..ec1706e7df50 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -25,21 +25,12 @@ struct virtqueue {
 	void *priv;
 };
 
-int virtqueue_add_buf_gfp(struct virtqueue *vq,
-			  struct scatterlist sg[],
-			  unsigned int out_num,
-			  unsigned int in_num,
-			  void *data,
-			  gfp_t gfp);
-
-static inline int virtqueue_add_buf(struct virtqueue *vq,
-				    struct scatterlist sg[],
-				    unsigned int out_num,
-				    unsigned int in_num,
-				    void *data)
-{
-	return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
-}
+int virtqueue_add_buf(struct virtqueue *vq,
+		      struct scatterlist sg[],
+		      unsigned int out_num,
+		      unsigned int in_num,
+		      void *data,
+		      gfp_t gfp);
 
 void virtqueue_kick(struct virtqueue *vq);
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 330421e54713..3d432068f627 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -272,7 +272,8 @@ req_retry:
 	in = pack_sg_list(chan->sg, out,
 			  VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
 
-	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+				GFP_ATOMIC);
 	if (err < 0) {
 		if (err == -ENOSPC) {
 			chan->ring_bufs_avail = 0;
@@ -414,7 +415,8 @@ req_retry_pinned:
 		in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
 				     in_pages, in_nr_pages, uidata, inlen);
 
-	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+				GFP_ATOMIC);
 	if (err < 0) {
 		if (err == -ENOSPC) {
 			chan->ring_bufs_avail = 0;
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 953db2abf6b9..b4fbc91c41b4 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -186,21 +186,12 @@ struct virtqueue {
 #endif
 
 /* Interfaces exported by virtio_ring. */
-int virtqueue_add_buf_gfp(struct virtqueue *vq,
-			  struct scatterlist sg[],
-			  unsigned int out_num,
-			  unsigned int in_num,
-			  void *data,
-			  gfp_t gfp);
-
-static inline int virtqueue_add_buf(struct virtqueue *vq,
-				    struct scatterlist sg[],
-				    unsigned int out_num,
-				    unsigned int in_num,
-				    void *data)
-{
-	return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
-}
+int virtqueue_add_buf(struct virtqueue *vq,
+		      struct scatterlist sg[],
+		      unsigned int out_num,
+		      unsigned int in_num,
+		      void *data,
+		      gfp_t gfp);
 
 void virtqueue_kick(struct virtqueue *vq);
 
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 0740284396c1..6bf95f995364 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -161,7 +161,8 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
 			if (started < bufs) {
 				sg_init_one(&sl, dev->buf, dev->buf_size);
 				r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
-						      dev->buf + started);
+						      dev->buf + started,
+						      GFP_ATOMIC);
 				if (likely(r >= 0)) {
 					++started;
 					virtqueue_kick(vq->vq);
-- 
cgit v1.2.3


From 41f0377f73039ca6fe97a469d1941a89cd9757f1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Jan 2012 15:44:43 +1030
Subject: virtio: support unlocked queue kick

Based on patch by Christoph for virtio_blk speedup:

	Split virtqueue_kick to be able to do the actual notification
	outside the lock protecting the virtqueue.  This patch was
	originally done by Stefan Hajnoczi, but I can't find the
	original one anymore and had to recreated it from memory.
	Pointers to the original or corrections for the commit message
	are welcome.

Stefan's patch was here:

	https://github.com/stefanha/linux/commit/a6d06644e3a58e57a774e77d7dc34c4a5a2e7496
	http://www.spinics.net/lists/linux-virtualization/msg14616.html

Third time's the charm!

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 60 +++++++++++++++++++++++++++++++++++---------
 include/linux/virtio.h       |  4 +++
 2 files changed, 52 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 6ea92a6d1134..c56bbe799241 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -262,19 +262,22 @@ add_head:
 EXPORT_SYMBOL_GPL(virtqueue_add_buf);
 
 /**
- * virtqueue_kick - update after add_buf
+ * virtqueue_kick_prepare - first half of split virtqueue_kick call.
  * @vq: the struct virtqueue
  *
- * After one or more virtqueue_add_buf calls, invoke this to kick
- * the other side.
+ * Instead of virtqueue_kick(), you can do:
+ *	if (virtqueue_kick_prepare(vq))
+ *		virtqueue_notify(vq);
  *
- * Caller must ensure we don't call this with other virtqueue
- * operations at the same time (except where noted).
+ * This is sometimes useful because the virtqueue_kick_prepare() needs
+ * to be serialized, but the actual virtqueue_notify() call does not.
  */
-void virtqueue_kick(struct virtqueue *_vq)
+bool virtqueue_kick_prepare(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	u16 new, old;
+	bool needs_kick;
+
 	START_USE(vq);
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
@@ -287,13 +290,46 @@ void virtqueue_kick(struct virtqueue *_vq)
 	/* Need to update avail index before checking if we should notify */
 	virtio_mb(vq);
 
-	if (vq->event ?
-	    vring_need_event(vring_avail_event(&vq->vring), new, old) :
-	    !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
-		/* Prod other side to tell it about changes. */
-		vq->notify(&vq->vq);
-
+	if (vq->event) {
+		needs_kick = vring_need_event(vring_avail_event(&vq->vring),
+					      new, old);
+	} else {
+		needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
+	}
 	END_USE(vq);
+	return needs_kick;
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
+
+/**
+ * virtqueue_notify - second half of split virtqueue_kick call.
+ * @vq: the struct virtqueue
+ *
+ * This does not need to be serialized.
+ */
+void virtqueue_notify(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/* Prod other side to tell it about changes. */
+	vq->notify(_vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_notify);
+
+/**
+ * virtqueue_kick - update after add_buf
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_buf calls, invoke this to kick
+ * the other side.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
+void virtqueue_kick(struct virtqueue *vq)
+{
+	if (virtqueue_kick_prepare(vq))
+		virtqueue_notify(vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index ec1706e7df50..31fe3a62874b 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -34,6 +34,10 @@ int virtqueue_add_buf(struct virtqueue *vq,
 
 void virtqueue_kick(struct virtqueue *vq);
 
+bool virtqueue_kick_prepare(struct virtqueue *vq);
+
+void virtqueue_notify(struct virtqueue *vq);
+
 void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
 void virtqueue_disable_cb(struct virtqueue *vq);
-- 
cgit v1.2.3


From f0fe6f11503fa9880867554350ac5d3092c47251 Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Thu, 22 Dec 2011 16:58:26 +0530
Subject: virtio: pci: add PM notification handlers for restore, freeze, thaw,
 poweroff

Handle thaw, restore and freeze notifications from the PM core.  Expose
these to individual virtio drivers that can quiesce and resume vq
operations.  For drivers not implementing the thaw() method, use the
restore method instead.

These functions also save device-specific data so that the device can be
put in pre-suspend state after resume, and disable and enable the PCI
device in the freeze and resume functions, respectively.

Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c | 94 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/virtio.h      |  5 +++
 2 files changed, 97 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 7f9ac1af7cfd..635e1efb3792 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -55,6 +55,10 @@ struct virtio_pci_device
 	unsigned msix_vectors;
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
+
+	/* Status saved during hibernate/restore */
+	u8 saved_status;
+
 	/* Whether we have vector per vq */
 	bool per_vq_vectors;
 };
@@ -734,9 +738,95 @@ static int virtio_pci_resume(struct device *dev)
 	return 0;
 }
 
+static int virtio_pci_freeze(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = 0;
+	vp_dev->saved_status = vp_get_status(&vp_dev->vdev);
+	if (drv && drv->freeze)
+		ret = drv->freeze(&vp_dev->vdev);
+
+	if (!ret)
+		pci_disable_device(pci_dev);
+	return ret;
+}
+
+static int restore_common(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	int ret;
+
+	ret = pci_enable_device(pci_dev);
+	if (ret)
+		return ret;
+	pci_set_master(pci_dev);
+	vp_finalize_features(&vp_dev->vdev);
+
+	return ret;
+}
+
+static int virtio_pci_thaw(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	ret = restore_common(dev);
+	if (ret)
+		return ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	if (drv && drv->thaw)
+		ret = drv->thaw(&vp_dev->vdev);
+	else if (drv && drv->restore)
+		ret = drv->restore(&vp_dev->vdev);
+
+	/* Finally, tell the device we're all set */
+	if (!ret)
+		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+
+	return ret;
+}
+
+static int virtio_pci_restore(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = restore_common(dev);
+	if (!ret && drv && drv->restore)
+		ret = drv->restore(&vp_dev->vdev);
+
+	/* Finally, tell the device we're all set */
+	if (!ret)
+		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+
+	return ret;
+}
+
 static const struct dev_pm_ops virtio_pci_pm_ops = {
-	.suspend = virtio_pci_suspend,
-	.resume  = virtio_pci_resume,
+	.suspend	= virtio_pci_suspend,
+	.resume		= virtio_pci_resume,
+	.freeze		= virtio_pci_freeze,
+	.thaw		= virtio_pci_thaw,
+	.restore	= virtio_pci_restore,
+	.poweroff	= virtio_pci_suspend,
 };
 #endif
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 31fe3a62874b..d0018d27c281 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -94,6 +94,11 @@ struct virtio_driver {
 	int (*probe)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
 	void (*config_changed)(struct virtio_device *dev);
+#ifdef CONFIG_PM
+	int (*freeze)(struct virtio_device *dev);
+	int (*thaw)(struct virtio_device *dev);
+	int (*restore)(struct virtio_device *dev);
+#endif
 };
 
 int register_virtio_driver(struct virtio_driver *drv);
-- 
cgit v1.2.3


From fd83240a60ecc59849420df3393e9e6d35c77683 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 12 Jan 2012 09:17:30 +0100
Subject: blockdev: convert some macros to static inlines

We prefer to program in C rather than preprocessor and it fixes this
warning when CONFIG_BLK_DEV_INTEGRITY is not set:

drivers/md/dm-table.c: In function 'dm_table_set_integrity':
drivers/md/dm-table.c:1285:3: warning: statement with no effect [-Wunused-value]

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 77 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index adc34133a56a..5cfb9b22627f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1298,19 +1298,70 @@ queue_max_integrity_segments(struct request_queue *q)
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
-#define blk_integrity_rq(rq)			(0)
-#define blk_rq_count_integrity_sg(a, b)		(0)
-#define blk_rq_map_integrity_sg(a, b, c)	(0)
-#define bdev_get_integrity(a)			(0)
-#define blk_get_integrity(a)			(0)
-#define blk_integrity_compare(a, b)		(0)
-#define blk_integrity_register(a, b)		(0)
-#define blk_integrity_unregister(a)		do { } while (0)
-#define blk_queue_max_integrity_segments(a, b)	do { } while (0)
-#define queue_max_integrity_segments(a)		(0)
-#define blk_integrity_merge_rq(a, b, c)		(0)
-#define blk_integrity_merge_bio(a, b, c)	(0)
-#define blk_integrity_is_initialized(a)		(0)
+struct bio;
+struct block_device;
+struct gendisk;
+struct blk_integrity;
+
+static inline int blk_integrity_rq(struct request *rq)
+{
+	return 0;
+}
+static inline int blk_rq_count_integrity_sg(struct request_queue *q,
+					    struct bio *b)
+{
+	return 0;
+}
+static inline int blk_rq_map_integrity_sg(struct request_queue *q,
+					  struct bio *b,
+					  struct scatterlist *s)
+{
+	return 0;
+}
+static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
+{
+	return 0;
+}
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+	return NULL;
+}
+static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
+{
+	return 0;
+}
+static inline int blk_integrity_register(struct gendisk *d,
+					 struct blk_integrity *b)
+{
+	return 0;
+}
+static inline void blk_integrity_unregister(struct gendisk *d)
+{
+}
+static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+						    unsigned int segs)
+{
+}
+static inline unsigned short queue_max_integrity_segments(struct request_queue *q)
+{
+	return 0;
+}
+static inline int blk_integrity_merge_rq(struct request_queue *rq,
+					 struct request *r1,
+					 struct request *r2)
+{
+	return 0;
+}
+static inline int blk_integrity_merge_bio(struct request_queue *rq,
+					  struct request *r,
+					  struct bio *b)
+{
+	return 0;
+}
+static inline bool blk_integrity_is_initialized(struct gendisk *g)
+{
+	return 0;
+}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit v1.2.3


From 46fe44ce8777f087aa8ad4a2605fdcfb9c2d63af Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 16 Nov 2011 15:03:59 +0100
Subject: quota: Pass information that quota is stored in system file to
 userspace

Quota tools need to know whether quota is stored in a system file or in
classical aquota.{user|group} files. So pass this information as a flag
in GETINFO quotactl.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 8 +++++---
 include/linux/quota.h | 6 +++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5ec59b20cf76..46741970371b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2125,6 +2125,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		mutex_unlock(&dqopt->dqio_mutex);
 		goto out_file_init;
 	}
+	if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
+		dqopt->info[type].dqi_flags |= DQF_SYS_FILE;
 	mutex_unlock(&dqopt->dqio_mutex);
 	spin_lock(&dq_state_lock);
 	dqopt->flags |= dquot_state_flag(flags, type);
@@ -2464,7 +2466,7 @@ int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	spin_lock(&dq_data_lock);
 	ii->dqi_bgrace = mi->dqi_bgrace;
 	ii->dqi_igrace = mi->dqi_igrace;
-	ii->dqi_flags = mi->dqi_flags & DQF_MASK;
+	ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK;
 	ii->dqi_valid = IIF_ALL;
 	spin_unlock(&dq_data_lock);
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
@@ -2490,8 +2492,8 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	if (ii->dqi_valid & IIF_IGRACE)
 		mi->dqi_igrace = ii->dqi_igrace;
 	if (ii->dqi_valid & IIF_FLAGS)
-		mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) |
-				(ii->dqi_flags & DQF_MASK);
+		mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) |
+				(ii->dqi_flags & DQF_SETINFO_MASK);
 	spin_unlock(&dq_data_lock);
 	mark_info_dirty(sb, type);
 	/* Force write to disk */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index cb7855699037..c09fa042b5ea 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -230,7 +230,11 @@ struct mem_dqinfo {
 struct super_block;
 
 #define DQF_MASK 0xffff		/* Mask for format specific flags */
-#define DQF_INFO_DIRTY_B 16
+#define DQF_GETINFO_MASK 0x1ffff	/* Mask for flags passed to userspace */
+#define DQF_SETINFO_MASK 0xffff		/* Mask for flags modifiable from userspace */
+#define DQF_SYS_FILE_B		16
+#define DQF_SYS_FILE (1 << DQF_SYS_FILE_B)	/* Quota file stored as system file */
+#define DQF_INFO_DIRTY_B	31
 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B)	/* Is info dirty? */
 
 extern void mark_info_dirty(struct super_block *sb, int type);
-- 
cgit v1.2.3


From 46f72b349290d2bd7aecea38f02609d814332df6 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 10 Jan 2012 09:04:37 -0800
Subject: vfs: export symbol d_find_any_alias()

Ceph needs this.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sage Weil <sage@newdream.net>
---
 fs/dcache.c            | 11 +++++++++--
 include/linux/dcache.h |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 89509b5a090e..ba960051dfb7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1471,7 +1471,14 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
 	return alias;
 }
 
-static struct dentry * d_find_any_alias(struct inode *inode)
+/**
+ * d_find_any_alias - find any alias for a given inode
+ * @inode: inode to find an alias for
+ *
+ * If any aliases exist for the given inode, take and return a
+ * reference for one of them.  If no aliases exist, return %NULL.
+ */
+struct dentry *d_find_any_alias(struct inode *inode)
 {
 	struct dentry *de;
 
@@ -1480,7 +1487,7 @@ static struct dentry * d_find_any_alias(struct inode *inode)
 	spin_unlock(&inode->i_lock);
 	return de;
 }
-
+EXPORT_SYMBOL(d_find_any_alias);
 
 /**
  * d_obtain_alias - find or allocate a dentry for a given inode
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ed9f74f6c519..3871ba743b4c 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -241,6 +241,7 @@ extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
+extern struct dentry *d_find_any_alias(struct inode *inode);
 extern struct dentry * d_obtain_alias(struct inode *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
-- 
cgit v1.2.3


From a4924c71aa43d4f8a3f342b1f71788349472e684 Mon Sep 17 00:00:00 2001
From: Girish K S <girish.shivananjappa@linaro.org>
Date: Wed, 11 Jan 2012 14:04:52 -0500
Subject: mmc: core: HS200 mode support for eMMC 4.5

This patch adds the support of the HS200 bus speed for eMMC 4.5 devices.
The eMMC 4.5 devices have support for 200MHz bus speed. The function
prototype of the tuning function is modified to handle the tuning
command number which is different in sd and mmc case.

Signed-off-by: Girish K S <girish.shivananjappa@linaro.org>
Signed-off-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/bus.c     |   3 +-
 drivers/mmc/core/debugfs.c |   3 +
 drivers/mmc/core/mmc.c     | 162 ++++++++++++++++++++++++++++++++++++++++++---
 drivers/mmc/core/sd.c      |   3 +-
 drivers/mmc/core/sdio.c    |   4 +-
 include/linux/mmc/card.h   |   3 +
 include/linux/mmc/host.h   |  11 ++-
 include/linux/mmc/mmc.h    |  66 +++++++++++++++++-
 8 files changed, 241 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index f8a228a61fd4..5d011a39dfff 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -303,10 +303,11 @@ int mmc_add_card(struct mmc_card *card)
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type);
 	} else {
-		printk(KERN_INFO "%s: new %s%s%s card at address %04x\n",
+		pr_info("%s: new %s%s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
 			mmc_card_uhs(card) ? "ultra high speed " :
 			(mmc_card_highspeed(card) ? "high speed " : ""),
+			(mmc_card_hs200(card) ? "HS200 " : ""),
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type, card->rca);
 	}
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 027615d3bf3e..9ab5b17d488a 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -135,6 +135,9 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 	case MMC_TIMING_UHS_DDR50:
 		str = "sd uhs DDR50";
 		break;
+	case MMC_TIMING_MMC_HS200:
+		str = "mmc high-speed SDR200";
+		break;
 	default:
 		str = "invalid";
 		break;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 67f346e0d105..59b9ba52e66a 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -286,6 +286,27 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 	}
 	card->ext_csd.raw_card_type = ext_csd[EXT_CSD_CARD_TYPE];
 	switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) {
+	case EXT_CSD_CARD_TYPE_SDR_ALL:
+	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V:
+	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V:
+	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52:
+		card->ext_csd.hs_max_dtr = 200000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_200;
+		break;
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_ALL:
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V:
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V:
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52:
+		card->ext_csd.hs_max_dtr = 200000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_2V;
+		break;
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_ALL:
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V:
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V:
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52:
+		card->ext_csd.hs_max_dtr = 200000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_8V;
+		break;
 	case EXT_CSD_CARD_TYPE_DDR_52 | EXT_CSD_CARD_TYPE_52 |
 	     EXT_CSD_CARD_TYPE_26:
 		card->ext_csd.hs_max_dtr = 52000000;
@@ -699,6 +720,79 @@ static int mmc_select_powerclass(struct mmc_card *card,
 	return err;
 }
 
+/*
+ * Selects the desired buswidth and switch to the HS200 mode
+ * if bus width set without error
+ */
+static int mmc_select_hs200(struct mmc_card *card)
+{
+	int idx, err = 0;
+	struct mmc_host *host;
+	static unsigned ext_csd_bits[] = {
+		EXT_CSD_BUS_WIDTH_4,
+		EXT_CSD_BUS_WIDTH_8,
+	};
+	static unsigned bus_widths[] = {
+		MMC_BUS_WIDTH_4,
+		MMC_BUS_WIDTH_8,
+	};
+
+	BUG_ON(!card);
+
+	host = card->host;
+
+	if (card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_2V &&
+	    host->caps2 & MMC_CAP2_HS200_1_2V_SDR)
+		if (mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120, 0))
+			err = mmc_set_signal_voltage(host,
+						     MMC_SIGNAL_VOLTAGE_180, 0);
+
+	/* If fails try again during next card power cycle */
+	if (err)
+		goto err;
+
+	idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 1 : 0;
+
+	/*
+	 * Unlike SD, MMC cards dont have a configuration register to notify
+	 * supported bus width. So bus test command should be run to identify
+	 * the supported bus width or compare the ext csd values of current
+	 * bus width and ext csd values of 1 bit mode read earlier.
+	 */
+	for (; idx >= 0; idx--) {
+
+		/*
+		 * Host is capable of 8bit transfer, then switch
+		 * the device to work in 8bit transfer mode. If the
+		 * mmc switch command returns error then switch to
+		 * 4bit transfer mode. On success set the corresponding
+		 * bus width on the host.
+		 */
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_BUS_WIDTH,
+				 ext_csd_bits[idx],
+				 card->ext_csd.generic_cmd6_time);
+		if (err)
+			continue;
+
+		mmc_set_bus_width(card->host, bus_widths[idx]);
+
+		if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
+			err = mmc_compare_ext_csds(card, bus_widths[idx]);
+		else
+			err = mmc_bus_test(card, bus_widths[idx]);
+		if (!err)
+			break;
+	}
+
+	/* switch to HS200 mode if bus width set successfully */
+	if (!err)
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_HS_TIMING, 2, 0);
+err:
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -905,11 +999,15 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	/*
 	 * Activate high speed (if supported)
 	 */
-	if ((card->ext_csd.hs_max_dtr != 0) &&
-		(host->caps & MMC_CAP_MMC_HIGHSPEED)) {
-		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				 EXT_CSD_HS_TIMING, 1,
-				 card->ext_csd.generic_cmd6_time);
+	if (card->ext_csd.hs_max_dtr != 0) {
+		err = 0;
+		if (card->ext_csd.hs_max_dtr > 52000000 &&
+		    host->caps2 & MMC_CAP2_HS200)
+			err = mmc_select_hs200(card);
+		else if	(host->caps & MMC_CAP_MMC_HIGHSPEED)
+			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					 EXT_CSD_HS_TIMING, 1, 0);
+
 		if (err && err != -EBADMSG)
 			goto free_card;
 
@@ -918,8 +1016,15 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			       mmc_hostname(card->host));
 			err = 0;
 		} else {
-			mmc_card_set_highspeed(card);
-			mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+			if (card->ext_csd.hs_max_dtr > 52000000 &&
+			    host->caps2 & MMC_CAP2_HS200) {
+				mmc_card_set_hs200(card);
+				mmc_set_timing(card->host,
+					       MMC_TIMING_MMC_HS200);
+			} else {
+				mmc_card_set_highspeed(card);
+				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+			}
 		}
 	}
 
@@ -944,7 +1049,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	max_dtr = (unsigned int)-1;
 
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_highspeed(card) || mmc_card_hs200(card)) {
 		if (max_dtr > card->ext_csd.hs_max_dtr)
 			max_dtr = card->ext_csd.hs_max_dtr;
 	} else if (max_dtr > card->csd.max_dtr) {
@@ -969,10 +1074,49 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 				ddr = MMC_1_2V_DDR_MODE;
 	}
 
+	/*
+	 * Indicate HS200 SDR mode (if supported).
+	 */
+	if (mmc_card_hs200(card)) {
+		u32 ext_csd_bits;
+		u32 bus_width = card->host->ios.bus_width;
+
+		/*
+		 * For devices supporting HS200 mode, the bus width has
+		 * to be set before executing the tuning function. If
+		 * set before tuning, then device will respond with CRC
+		 * errors for responses on CMD line. So for HS200 the
+		 * sequence will be
+		 * 1. set bus width 4bit / 8 bit (1 bit not supported)
+		 * 2. switch to HS200 mode
+		 * 3. set the clock to > 52Mhz <=200MHz and
+		 * 4. execute tuning for HS200
+		 */
+		if ((host->caps2 & MMC_CAP2_HS200) &&
+		    card->host->ops->execute_tuning)
+			err = card->host->ops->execute_tuning(card->host,
+				MMC_SEND_TUNING_BLOCK_HS200);
+		if (err) {
+			pr_warning("%s: tuning execution failed\n",
+				   mmc_hostname(card->host));
+			goto err;
+		}
+
+		ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+				EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4;
+		err = mmc_select_powerclass(card, ext_csd_bits, ext_csd);
+		if (err) {
+			pr_err("%s: power class selection to bus width %d failed\n",
+				mmc_hostname(card->host), 1 << bus_width);
+			goto err;
+		}
+	}
+
 	/*
 	 * Activate wide bus and DDR (if supported).
 	 */
-	if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
+	if (!mmc_card_hs200(card) &&
+	    (card->csd.mmca_vsn >= CSD_SPEC_VER_3) &&
 	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
 		static unsigned ext_csd_bits[][2] = {
 			{ EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 },
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 6f27d35081b8..c63ad03c29c7 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -661,7 +661,8 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 
 	/* SPI mode doesn't define CMD19 */
 	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
-		err = card->host->ops->execute_tuning(card->host);
+		err = card->host->ops->execute_tuning(card->host,
+						      MMC_SEND_TUNING_BLOCK);
 
 out:
 	kfree(status);
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index b77f770ce5d1..bd7bacc950dc 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -14,6 +14,7 @@
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
+#include <linux/mmc/mmc.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_func.h>
 #include <linux/mmc/sdio_ids.h>
@@ -556,7 +557,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card)
 
 	/* Initialize and start re-tuning timer */
 	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
-		err = card->host->ops->execute_tuning(card->host);
+		err = card->host->ops->execute_tuning(card->host,
+						      MMC_SEND_TUNING_BLOCK);
 
 out:
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 9478a6bf1bb1..9f22ba572de0 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -216,6 +216,7 @@ struct mmc_card {
 #define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
 #define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
 #define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
+#define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -374,6 +375,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_present(c)	((c)->state & MMC_STATE_PRESENT)
 #define mmc_card_readonly(c)	((c)->state & MMC_STATE_READONLY)
 #define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
+#define mmc_card_hs200(c)	((c)->state & MMC_STATE_HIGHSPEED_200)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
 #define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
 #define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
@@ -384,6 +386,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
+#define mmc_card_set_hs200(c)	((c)->state |= MMC_STATE_HIGHSPEED_200)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
 #define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 031d865167a2..dd13e0539092 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -56,10 +56,13 @@ struct mmc_ios {
 #define MMC_TIMING_UHS_SDR50	3
 #define MMC_TIMING_UHS_SDR104	4
 #define MMC_TIMING_UHS_DDR50	5
+#define MMC_TIMING_MMC_HS200	6
 
 #define MMC_SDR_MODE		0
 #define MMC_1_2V_DDR_MODE	1
 #define MMC_1_8V_DDR_MODE	2
+#define MMC_1_2V_SDR_MODE	3
+#define MMC_1_8V_SDR_MODE	4
 
 	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
 
@@ -148,7 +151,9 @@ struct mmc_host_ops {
 	void	(*init_card)(struct mmc_host *host, struct mmc_card *card);
 
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
-	int	(*execute_tuning)(struct mmc_host *host);
+
+	/* The tuning command opcode value is different for SD and eMMC cards */
+	int	(*execute_tuning)(struct mmc_host *host, u32 opcode);
 	void	(*enable_preset_value)(struct mmc_host *host, bool enable);
 	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 	void	(*hw_reset)(struct mmc_host *host);
@@ -248,6 +253,10 @@ struct mmc_host {
 #define MMC_CAP2_POWEROFF_NOTIFY (1 << 2)	/* Notify poweroff supported */
 #define MMC_CAP2_NO_MULTI_READ	(1 << 3)	/* Multiblock reads don't work */
 #define MMC_CAP2_NO_SLEEP_CMD	(1 << 4)	/* Don't allow sleep command */
+#define MMC_CAP2_HS200_1_8V_SDR	(1 << 5)        /* can support */
+#define MMC_CAP2_HS200_1_2V_SDR	(1 << 6)        /* can support */
+#define MMC_CAP2_HS200		(MMC_CAP2_HS200_1_8V_SDR | \
+				 MMC_CAP2_HS200_1_2V_SDR)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 	unsigned int        power_notify_type;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 665548e639e8..fb9f6e116e1c 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -51,6 +51,7 @@
 #define MMC_READ_SINGLE_BLOCK    17   /* adtc [31:0] data addr   R1  */
 #define MMC_READ_MULTIPLE_BLOCK  18   /* adtc [31:0] data addr   R1  */
 #define MMC_SEND_TUNING_BLOCK    19   /* adtc                    R1  */
+#define MMC_SEND_TUNING_BLOCK_HS200	21	/* adtc R1  */
 
   /* class 3 */
 #define MMC_WRITE_DAT_UNTIL_STOP 20   /* adtc [31:0] data addr   R1  */
@@ -339,13 +340,76 @@ struct _mmc_csd {
 
 #define EXT_CSD_CARD_TYPE_26	(1<<0)	/* Card can run at 26MHz */
 #define EXT_CSD_CARD_TYPE_52	(1<<1)	/* Card can run at 52MHz */
-#define EXT_CSD_CARD_TYPE_MASK	0xF	/* Mask out reserved bits */
+#define EXT_CSD_CARD_TYPE_MASK	0x3F	/* Mask out reserved bits */
 #define EXT_CSD_CARD_TYPE_DDR_1_8V  (1<<2)   /* Card can run at 52MHz */
 					     /* DDR mode @1.8V or 3V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_1_2V  (1<<3)   /* Card can run at 52MHz */
 					     /* DDR mode @1.2V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_52       (EXT_CSD_CARD_TYPE_DDR_1_8V  \
 					| EXT_CSD_CARD_TYPE_DDR_1_2V)
+#define EXT_CSD_CARD_TYPE_SDR_1_8V	(1<<4)	/* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_SDR_1_2V	(1<<5)	/* Card can run at 200MHz */
+						/* SDR mode @1.2V I/O */
+
+#define EXT_CSD_CARD_TYPE_SDR_200	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+					 EXT_CSD_CARD_TYPE_SDR_1_2V)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL	(EXT_CSD_CARD_TYPE_SDR_200 | \
+					 EXT_CSD_CARD_TYPE_52 | \
+					 EXT_CSD_CARD_TYPE_26)
+
+#define	EXT_CSD_CARD_TYPE_SDR_1_2V_ALL	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+					 EXT_CSD_CARD_TYPE_52 | \
+					 EXT_CSD_CARD_TYPE_26)
+
+#define	EXT_CSD_CARD_TYPE_SDR_1_8V_ALL	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+					 EXT_CSD_CARD_TYPE_52 | \
+					 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_DDR_52 | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_DDR_52 | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_200 | \
+						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_200 | \
+						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52	(EXT_CSD_CARD_TYPE_SDR_200 | \
+						 EXT_CSD_CARD_TYPE_DDR_52 | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
-- 
cgit v1.2.3


From 069c9f142822d552ec885572945d8bce9eff0519 Mon Sep 17 00:00:00 2001
From: Girish K S <girish.shivananjappa@linaro.org>
Date: Fri, 6 Jan 2012 09:56:39 +0530
Subject: mmc: host: Adds support for eMMC 4.5 HS200 mode

This patch adds support for the HS200 mode on the host side.
Also enables the tuning feature required when the HS200 mode
is selected.

Signed-off-by: Girish K S <girish.shivananjappa@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/sdhci.c  | 57 ++++++++++++++++++++++++++++++++++-------------
 drivers/mmc/host/sdhci.h  |  1 +
 include/linux/mmc/sdhci.h |  1 +
 3 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 0636e9a587b1..96f4e548ba22 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -49,7 +49,7 @@ static void sdhci_finish_data(struct sdhci_host *);
 
 static void sdhci_send_command(struct sdhci_host *, struct mmc_command *);
 static void sdhci_finish_command(struct sdhci_host *);
-static int sdhci_execute_tuning(struct mmc_host *mmc);
+static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode);
 static void sdhci_tuning_timer(unsigned long data);
 
 #ifdef CONFIG_PM_RUNTIME
@@ -1014,7 +1014,8 @@ static void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 		flags |= SDHCI_CMD_INDEX;
 
 	/* CMD19 is special in that the Data Present Select should be set */
-	if (cmd->data || (cmd->opcode == MMC_SEND_TUNING_BLOCK))
+	if (cmd->data || cmd->opcode == MMC_SEND_TUNING_BLOCK ||
+	    cmd->opcode == MMC_SEND_TUNING_BLOCK_HS200)
 		flags |= SDHCI_CMD_DATA;
 
 	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
@@ -1287,7 +1288,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		if ((host->flags & SDHCI_NEEDS_RETUNING) &&
 		    !(present_state & (SDHCI_DOING_WRITE | SDHCI_DOING_READ))) {
 			spin_unlock_irqrestore(&host->lock, flags);
-			sdhci_execute_tuning(mmc);
+			sdhci_execute_tuning(mmc, mrq->cmd->opcode);
 			spin_lock_irqsave(&host->lock, flags);
 
 			/* Restore original mmc_request structure */
@@ -1382,7 +1383,8 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		unsigned int clock;
 
 		/* In case of UHS-I modes, set High Speed Enable */
-		if ((ios->timing == MMC_TIMING_UHS_SDR50) ||
+		if ((ios->timing == MMC_TIMING_MMC_HS200) ||
+		    (ios->timing == MMC_TIMING_UHS_SDR50) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR104) ||
 		    (ios->timing == MMC_TIMING_UHS_DDR50) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR25))
@@ -1435,7 +1437,9 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 			ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 			/* Select Bus Speed Mode for host */
 			ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
-			if (ios->timing == MMC_TIMING_UHS_SDR12)
+			if (ios->timing == MMC_TIMING_MMC_HS200)
+				ctrl_2 |= SDHCI_CTRL_HS_SDR200;
+			else if (ios->timing == MMC_TIMING_UHS_SDR12)
 				ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
 			else if (ios->timing == MMC_TIMING_UHS_SDR25)
 				ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
@@ -1682,7 +1686,7 @@ static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
 	return err;
 }
 
-static int sdhci_execute_tuning(struct mmc_host *mmc)
+static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
 	struct sdhci_host *host;
 	u16 ctrl;
@@ -1690,6 +1694,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 	int tuning_loop_counter = MAX_TUNING_LOOP;
 	unsigned long timeout;
 	int err = 0;
+	bool requires_tuning_nonuhs = false;
 
 	host = mmc_priv(mmc);
 
@@ -1700,13 +1705,19 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 
 	/*
-	 * Host Controller needs tuning only in case of SDR104 mode
-	 * and for SDR50 mode when Use Tuning for SDR50 is set in
+	 * The Host Controller needs tuning only in case of SDR104 mode
+	 * and for SDR50 mode when Use Tuning for SDR50 is set in the
 	 * Capabilities register.
+	 * If the Host Controller supports the HS200 mode then the
+	 * tuning function has to be executed.
 	 */
+	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
+	    (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
+	     host->flags & SDHCI_HS200_NEEDS_TUNING))
+		requires_tuning_nonuhs = true;
+
 	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) ||
-	    (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
-	    (host->flags & SDHCI_SDR50_NEEDS_TUNING)))
+	    requires_tuning_nonuhs)
 		ctrl |= SDHCI_CTRL_EXEC_TUNING;
 	else {
 		spin_unlock(&host->lock);
@@ -1742,7 +1753,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 		if (!tuning_loop_counter && !timeout)
 			break;
 
-		cmd.opcode = MMC_SEND_TUNING_BLOCK;
+		cmd.opcode = opcode;
 		cmd.arg = 0;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 		cmd.retries = 0;
@@ -1757,7 +1768,17 @@ static int sdhci_execute_tuning(struct mmc_host *mmc)
 		 * block to the Host Controller. So we set the block size
 		 * to 64 here.
 		 */
-		sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64), SDHCI_BLOCK_SIZE);
+		if (cmd.opcode == MMC_SEND_TUNING_BLOCK_HS200) {
+			if (mmc->ios.bus_width == MMC_BUS_WIDTH_8)
+				sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 128),
+					     SDHCI_BLOCK_SIZE);
+			else if (mmc->ios.bus_width == MMC_BUS_WIDTH_4)
+				sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64),
+					     SDHCI_BLOCK_SIZE);
+		} else {
+			sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64),
+				     SDHCI_BLOCK_SIZE);
+		}
 
 		/*
 		 * The tuning block is sent by the card to the host controller.
@@ -2140,12 +2161,14 @@ static void sdhci_show_adma_error(struct sdhci_host *host) { }
 
 static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 {
+	u32 command;
 	BUG_ON(intmask == 0);
 
 	/* CMD19 generates _only_ Buffer Read Ready interrupt */
 	if (intmask & SDHCI_INT_DATA_AVAIL) {
-		if (SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)) ==
-		    MMC_SEND_TUNING_BLOCK) {
+		command = SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND));
+		if (command == MMC_SEND_TUNING_BLOCK ||
+		    command == MMC_SEND_TUNING_BLOCK_HS200) {
 			host->tuning_done = 1;
 			wake_up(&host->buf_ready_int);
 			return;
@@ -2747,10 +2770,14 @@ int sdhci_add_host(struct sdhci_host *host)
 	if (caps[1] & SDHCI_SUPPORT_DDR50)
 		mmc->caps |= MMC_CAP_UHS_DDR50;
 
-	/* Does the host needs tuning for SDR50? */
+	/* Does the host need tuning for SDR50? */
 	if (caps[1] & SDHCI_USE_SDR50_TUNING)
 		host->flags |= SDHCI_SDR50_NEEDS_TUNING;
 
+	/* Does the host need tuning for HS200? */
+	if (mmc->caps2 & MMC_CAP2_HS200)
+		host->flags |= SDHCI_HS200_NEEDS_TUNING;
+
 	/* Driver Type(s) (A, C, D) supported by the host */
 	if (caps[1] & SDHCI_DRIVER_TYPE_A)
 		mmc->caps |= MMC_CAP_DRIVER_TYPE_A;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index a04d4d0c6fd2..ad265b96b75b 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -158,6 +158,7 @@
 #define   SDHCI_CTRL_UHS_SDR50		0x0002
 #define   SDHCI_CTRL_UHS_SDR104		0x0003
 #define   SDHCI_CTRL_UHS_DDR50		0x0004
+#define   SDHCI_CTRL_HS_SDR200		0x0005 /* reserved value in SDIO spec */
 #define  SDHCI_CTRL_VDD_180		0x0008
 #define  SDHCI_CTRL_DRV_TYPE_MASK	0x0030
 #define   SDHCI_CTRL_DRV_TYPE_B		0x0000
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index dad7a469f09c..c750f85177d9 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -119,6 +119,7 @@ struct sdhci_host {
 #define SDHCI_AUTO_CMD23	(1<<7)	/* Auto CMD23 support */
 #define SDHCI_PV_ENABLED	(1<<8)	/* Preset value enabled */
 #define SDHCI_SDIO_IRQ_ENABLED	(1<<9)	/* SDIO irq enabled */
+#define SDHCI_HS200_NEEDS_TUNING (1<<10)	/* HS200 needs tuning */
 
 	unsigned int version;	/* SDHCI spec. version */
 
-- 
cgit v1.2.3


From bd77c04772da38fca510c81f78e51f727123b919 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 13 Jan 2012 09:32:14 +1030
Subject: module: struct module_ref should contains long fields

module_ref contains two "unsigned int" fields.

Thats now too small, since some machines can open more than 2^32 files.

Check commit 518de9b39e8 (fs: allow for more than 2^31 files) for
reference.

We can add an aligned(2 * sizeof(unsigned long)) attribute to force
alloc_percpu() allocating module_ref areas in single cache lines.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Rusty Russell <rusty@rustcorp.com.au>
CC: Tejun Heo <tj@kernel.org>
CC: Robin Holt <holt@sgi.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h      | 21 ++++++++++++++++-----
 kernel/debug/kdb/kdb_main.c |  2 +-
 kernel/module.c             |  8 ++++----
 3 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 3cb7839a60b9..4598bf03e98b 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -205,6 +205,20 @@ enum module_state
 	MODULE_STATE_GOING,
 };
 
+/**
+ * struct module_ref - per cpu module reference counts
+ * @incs: number of module get on this cpu
+ * @decs: number of module put on this cpu
+ *
+ * We force an alignment on 8 or 16 bytes, so that alloc_percpu()
+ * put @incs/@decs in same cache line, with no extra memory cost,
+ * since alloc_percpu() is fine grained.
+ */
+struct module_ref {
+	unsigned long incs;
+	unsigned long decs;
+} __attribute((aligned(2 * sizeof(unsigned long))));
+
 struct module
 {
 	enum module_state state;
@@ -347,10 +361,7 @@ struct module
 	/* Destruction function. */
 	void (*exit)(void);
 
-	struct module_ref {
-		unsigned int incs;
-		unsigned int decs;
-	} __percpu *refptr;
+	struct module_ref __percpu *refptr;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS
@@ -434,7 +445,7 @@ extern void __module_put_and_exit(struct module *mod, long code)
 #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code);
 
 #ifdef CONFIG_MODULE_UNLOAD
-unsigned int module_refcount(struct module *mod);
+unsigned long module_refcount(struct module *mod);
 void __symbol_put(const char *symbol);
 #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
 void symbol_put_addr(void *addr);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 63786e71a3cd..e2ae7349437f 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1982,7 +1982,7 @@ static int kdb_lsmod(int argc, const char **argv)
 		kdb_printf("%-20s%8u  0x%p ", mod->name,
 			   mod->core_size, (void *)mod);
 #ifdef CONFIG_MODULE_UNLOAD
-		kdb_printf("%4d ", module_refcount(mod));
+		kdb_printf("%4ld ", module_refcount(mod));
 #endif
 		if (mod->state == MODULE_STATE_GOING)
 			kdb_printf(" (Unloading)");
diff --git a/kernel/module.c b/kernel/module.c
index 4928cffc3dcc..14b8e82e05d4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -725,9 +725,9 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
 	}
 }
 
-unsigned int module_refcount(struct module *mod)
+unsigned long module_refcount(struct module *mod)
 {
-	unsigned int incs = 0, decs = 0;
+	unsigned long incs = 0, decs = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu)
@@ -853,7 +853,7 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
 	struct module_use *use;
 	int printed_something = 0;
 
-	seq_printf(m, " %u ", module_refcount(mod));
+	seq_printf(m, " %lu ", module_refcount(mod));
 
 	/* Always include a trailing , so userspace can differentiate
            between this and the old multi-field proc format. */
@@ -903,7 +903,7 @@ EXPORT_SYMBOL_GPL(symbol_put_addr);
 static ssize_t show_refcnt(struct module_attribute *mattr,
 			   struct module_kobject *mk, char *buffer)
 {
-	return sprintf(buffer, "%u\n", module_refcount(mk->mod));
+	return sprintf(buffer, "%lu\n", module_refcount(mk->mod));
 }
 
 static struct module_attribute refcnt = {
-- 
cgit v1.2.3


From bafeafeab94b8d3019aac15c2df2ce47b08a6363 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:16 +1030
Subject: module_param: check type correctness for module_param_array

module_param_array(), unlike its non-array cousins, didn't check the type
of the variable.  Fixing this found two bugs.

Cc: Luca Risolia <luca.risolia@studio.unibo.it>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Cc: linux-media@vger.kernel.org
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/media/video/et61x251/et61x251_core.c | 4 ++--
 drivers/media/video/sn9c102/sn9c102_core.c   | 4 ++--
 drivers/mfd/janz-cmodio.c                    | 2 +-
 drivers/misc/lis3lv02d/lis3lv02d.c           | 2 ++
 include/linux/moduleparam.h                  | 1 +
 5 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index 40f214ab924f..5539f09440ac 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -76,8 +76,8 @@ MODULE_PARM_DESC(video_nr,
 		 "\none and for every other camera."
 		 "\n");
 
-static short force_munmap[] = {[0 ... ET61X251_MAX_DEVICES-1] =
-			       ET61X251_FORCE_MUNMAP};
+static bool force_munmap[] = {[0 ... ET61X251_MAX_DEVICES-1] =
+			      ET61X251_FORCE_MUNMAP};
 module_param_array(force_munmap, bool, NULL, 0444);
 MODULE_PARM_DESC(force_munmap,
 		 "\n<0|1[,...]> Force the application to unmap previously"
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index 7025be129286..c2882fa5be85 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -75,8 +75,8 @@ MODULE_PARM_DESC(video_nr,
 		 "\none and for every other camera."
 		 "\n");
 
-static short force_munmap[] = {[0 ... SN9C102_MAX_DEVICES-1] =
-			       SN9C102_FORCE_MUNMAP};
+static bool force_munmap[] = {[0 ... SN9C102_MAX_DEVICES-1] =
+			      SN9C102_FORCE_MUNMAP};
 module_param_array(force_munmap, bool, NULL, 0444);
 MODULE_PARM_DESC(force_munmap,
 		 " <0|1[,...]>"
diff --git a/drivers/mfd/janz-cmodio.c b/drivers/mfd/janz-cmodio.c
index 5c2a06acb77f..a9223ed1b7c5 100644
--- a/drivers/mfd/janz-cmodio.c
+++ b/drivers/mfd/janz-cmodio.c
@@ -33,7 +33,7 @@
 
 /* Module Parameters */
 static unsigned int num_modules = CMODIO_MAX_MODULES;
-static unsigned char *modules[CMODIO_MAX_MODULES] = {
+static char *modules[CMODIO_MAX_MODULES] = {
 	"empty", "empty", "empty", "empty",
 };
 
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index 29d12a70eb1b..a981e2a42f92 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -111,6 +111,8 @@ static struct kernel_param_ops param_ops_axis = {
 	.get = param_get_int,
 };
 
+#define param_check_axis(name, p) param_check_int(name, p)
+
 module_param_array_named(axes, lis3_dev.ac.as_array, axis, NULL, 0644);
 MODULE_PARM_DESC(axes, "Axis-mapping for x,y,z directions");
 
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 7939f636c8ba..794d4b0f1215 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -395,6 +395,7 @@ extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
  * module_param_named() for why this might be necessary.
  */
 #define module_param_array_named(name, array, type, nump, perm)		\
+	param_check_##type(name, &(array)[0]);				\
 	static const struct kparam_array __param_arr_##name		\
 	= { .max = ARRAY_SIZE(array), .num = nump,                      \
 	    .ops = &param_ops_##type,					\
-- 
cgit v1.2.3


From 69116f279a9eaf4c540934269342d9149538fc79 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:17 +1030
Subject: module_param: avoid bool abuse, add bint for special cases.

For historical reasons, we allow module_param(bool) to take an int (or
an unsigned int).  That's going away.

A few drivers really want an int: they set it to -1 and a parameter
will set it to 0 or 1.  This sucks: reading them from sysfs will give
'Y' for both -1 and 1, but if we change it to an int, then the users
might be broken (if they did "param" instead of "param=1").

Use a new 'bint' parser for them.

(ntfs has a different problem: it needs an int for debug_msgs because
it's also exposed via sysctl.)

Cc: Steve Glendinning <steve.glendinning@smsc.com>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Guenter Roeck <guenter.roeck@ericsson.com>
Cc: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Cc: Christoph Raisch <raisch@de.ibm.com>
Cc: Roland Dreier <roland@kernel.org>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Hal Rosenstock <hal.rosenstock@gmail.com>
Cc: linux390@de.ibm.com
Cc: Anton Altaparmakov <anton@tuxera.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: lm-sensors@lm-sensors.org
Cc: linux-rdma@vger.kernel.org
Cc: linux-s390@vger.kernel.org
Cc: linux-ntfs-dev@lists.sourceforge.net
Cc: alsa-devel@alsa-project.org
Acked-by: Takashi Iwai <tiwai@suse.de> (For the sound part)
Acked-by: Guenter Roeck <guenter.roeck@ericsson.com> (For the hwmon driver)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/hwmon/emc2103.c                |  2 +-
 drivers/infiniband/hw/ehca/ehca_main.c |  2 +-
 drivers/s390/cio/cmf.c                 |  2 +-
 fs/ntfs/super.c                        |  2 +-
 include/linux/moduleparam.h            |  6 ++++++
 kernel/params.c                        | 24 ++++++++++++++++++++++++
 sound/pci/intel8x0.c                   |  4 ++--
 7 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/emc2103.c b/drivers/hwmon/emc2103.c
index 848a2b0bc83f..865063914d76 100644
--- a/drivers/hwmon/emc2103.c
+++ b/drivers/hwmon/emc2103.c
@@ -55,7 +55,7 @@ static const u8 REG_TEMP_MAX[4] = { 0x34, 0x30, 0x31, 0x32 };
  * it.  Default is to leave the device in the state it's already in (-1).
  * This parameter allows APD mode to be optionally forced on or off */
 static int apd = -1;
-module_param(apd, bool, 0);
+module_param(apd, bint, 0);
 MODULE_PARM_DESC(init, "Set to zero to disable anti-parallel diode mode");
 
 struct temperature {
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index c240e9972cb0..8af8d4f7bdb1 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -82,7 +82,7 @@ module_param_named(port_act_time, ehca_port_act_time, int,  S_IRUGO);
 module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  bool, S_IRUGO);
 module_param_named(static_rate,   ehca_static_rate,   int,  S_IRUGO);
 module_param_named(scaling_code,  ehca_scaling_code,  bool, S_IRUGO);
-module_param_named(lock_hcalls,   ehca_lock_hcalls,   bool, S_IRUGO);
+module_param_named(lock_hcalls,   ehca_lock_hcalls,   bint, S_IRUGO);
 module_param_named(number_of_cqs, ehca_max_cq,        int,  S_IRUGO);
 module_param_named(number_of_qps, ehca_max_qp,        int,  S_IRUGO);
 
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index 2985eb439485..204ca728e7fd 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -98,7 +98,7 @@ enum cmb_format {
  * enum cmb_format.
  */
 static int format = CMF_AUTODETECT;
-module_param(format, bool, 0444);
+module_param(format, bint, 0444);
 
 /**
  * struct cmb_operations - functions to use depending on cmb_format
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 608be4516091..5a4a8af5c406 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3198,7 +3198,7 @@ MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparm
 MODULE_VERSION(NTFS_VERSION);
 MODULE_LICENSE("GPL");
 #ifdef DEBUG
-module_param(debug_msgs, bool, 0);
+module_param(debug_msgs, bint, 0);
 MODULE_PARM_DESC(debug_msgs, "Enable debug messages.");
 #endif
 
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 794d4b0f1215..6bdde0c3bcca 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -367,6 +367,12 @@ extern int param_set_invbool(const char *val, const struct kernel_param *kp);
 extern int param_get_invbool(char *buffer, const struct kernel_param *kp);
 #define param_check_invbool(name, p) __param_check(name, p, bool)
 
+/* An int, which can only be set like a bool (though it shows as an int). */
+extern struct kernel_param_ops param_ops_bint;
+extern int param_set_bint(const char *val, const struct kernel_param *kp);
+#define param_get_bint param_get_int
+#define param_check_bint param_check_int
+
 /**
  * module_param_array - a parameter which is an array of some type
  * @name: the name of the array variable
diff --git a/kernel/params.c b/kernel/params.c
index 9240664af110..32ee04308285 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -363,6 +363,30 @@ struct kernel_param_ops param_ops_invbool = {
 };
 EXPORT_SYMBOL(param_ops_invbool);
 
+int param_set_bint(const char *val, const struct kernel_param *kp)
+{
+	struct kernel_param boolkp;
+	bool v;
+	int ret;
+
+	/* Match bool exactly, by re-using it. */
+	boolkp = *kp;
+	boolkp.arg = &v;
+	boolkp.flags |= KPARAM_ISBOOL;
+
+	ret = param_set_bool(val, &boolkp);
+	if (ret == 0)
+		*(int *)kp->arg = v;
+	return ret;
+}
+EXPORT_SYMBOL(param_set_bint);
+
+struct kernel_param_ops param_ops_bint = {
+	.set = param_set_bint,
+	.get = param_get_int,
+};
+EXPORT_SYMBOL(param_ops_bint);
+
 /* We break the rule and mangle the string. */
 static int param_array(const char *name,
 		       const char *val,
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 40b181bab930..9f3b01bb72c8 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -95,13 +95,13 @@ module_param(ac97_quirk, charp, 0444);
 MODULE_PARM_DESC(ac97_quirk, "AC'97 workaround for strange hardware.");
 module_param(buggy_semaphore, bool, 0444);
 MODULE_PARM_DESC(buggy_semaphore, "Enable workaround for hardwares with problematic codec semaphores.");
-module_param(buggy_irq, bool, 0444);
+module_param(buggy_irq, bint, 0444);
 MODULE_PARM_DESC(buggy_irq, "Enable workaround for buggy interrupts on some motherboards.");
 module_param(xbox, bool, 0444);
 MODULE_PARM_DESC(xbox, "Set to 1 for Xbox, if you have problems with the AC'97 codec detection.");
 module_param(spdif_aclink, int, 0444);
 MODULE_PARM_DESC(spdif_aclink, "S/PDIF over AC-link.");
-module_param(inside_vm, bool, 0444);
+module_param(inside_vm, bint, 0444);
 MODULE_PARM_DESC(inside_vm, "KVM/Parallels optimization.");
 
 /* just for backward compatibility */
-- 
cgit v1.2.3


From 2329abfa344a9a824bc4c71f2415528777265510 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:18 +1030
Subject: module_param: make bool parameters really bool (core code)

module_param(bool) used to counter-intuitively take an int.  In
fddd5201 (mid-2009) we allowed bool or int/unsigned int using a messy
trick.

It's time to remove the int/unsigned int option.  For this version
it'll simply give a warning, but it'll break next kernel version.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/init.h   | 3 ++-
 init/main.c            | 2 +-
 kernel/irq/internals.h | 2 +-
 kernel/irq/spurious.c  | 2 +-
 kernel/printk.c        | 8 ++++----
 5 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 9146f39cdddf..6b951095a42f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -2,6 +2,7 @@
 #define _LINUX_INIT_H
 
 #include <linux/compiler.h>
+#include <linux/types.h>
 
 /* These macros are used to mark some functions or 
  * initialized data (doesn't apply to uninitialized data)
@@ -156,7 +157,7 @@ void prepare_namespace(void);
 
 extern void (*late_time_init)(void);
 
-extern int initcall_debug;
+extern bool initcall_debug;
 
 #endif
   
diff --git a/init/main.c b/init/main.c
index 415548e808d2..ff49a6dacfbb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -648,7 +648,7 @@ static void __init do_ctors(void)
 #endif
 }
 
-int initcall_debug;
+bool initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
 
 static char msgbuf[64];
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index a73dd6c7372d..b7952316016a 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -15,7 +15,7 @@
 
 #define istate core_internal_state__do_not_mess_with_it
 
-extern int noirqdebug;
+extern bool noirqdebug;
 
 /*
  * Bits used by threaded handlers:
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dc813a948be2..611cd6003c45 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -325,7 +325,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 	desc->irqs_unhandled = 0;
 }
 
-int noirqdebug __read_mostly;
+bool noirqdebug __read_mostly;
 
 int noirqdebug_setup(char *str)
 {
diff --git a/kernel/printk.c b/kernel/printk.c
index 63b3bc31fe32..13c0a1143f49 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -521,7 +521,7 @@ static void __call_console_drivers(unsigned start, unsigned end)
 	}
 }
 
-static int __read_mostly ignore_loglevel;
+static bool __read_mostly ignore_loglevel;
 
 static int __init ignore_loglevel_setup(char *str)
 {
@@ -696,9 +696,9 @@ static void zap_locks(void)
 }
 
 #if defined(CONFIG_PRINTK_TIME)
-static int printk_time = 1;
+static bool printk_time = 1;
 #else
-static int printk_time = 0;
+static bool printk_time = 0;
 #endif
 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
 
@@ -1098,7 +1098,7 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha
 	return -1;
 }
 
-int console_suspend_enabled = 1;
+bool console_suspend_enabled = 1;
 EXPORT_SYMBOL(console_suspend_enabled);
 
 static int __init console_suspend_disable(char *str)
-- 
cgit v1.2.3


From 90ab5ee94171b3e28de6bb42ee30b527014e0be7 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:20 +1030
Subject: module_param: make bool parameters really bool (drivers & misc)

module_param(bool) used to counter-intuitively take an int.  In
fddd5201 (mid-2009) we allowed bool or int/unsigned int using a messy
trick.

It's time to remove the int/unsigned int option.  For this version
it'll simply give a warning, but it'll break next kernel version.

Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/accessibility/braille/braille_console.c |  2 +-
 drivers/acpi/acpica/acglobal.h                  |  2 +-
 drivers/acpi/apei/ghes.c                        |  2 +-
 drivers/acpi/apei/hest.c                        |  2 +-
 drivers/acpi/dock.c                             |  2 +-
 drivers/acpi/pci_slot.c                         |  2 +-
 drivers/acpi/video.c                            |  6 +++---
 drivers/ata/sata_nv.c                           |  6 +++---
 drivers/ata/sata_sil24.c                        |  2 +-
 drivers/atm/he.c                                |  6 +++---
 drivers/block/drbd/drbd_int.h                   |  4 ++--
 drivers/block/drbd/drbd_main.c                  |  4 ++--
 drivers/block/paride/bpck6.c                    |  5 ++---
 drivers/block/paride/pd.c                       |  3 ++-
 drivers/block/paride/pf.c                       |  4 +++-
 drivers/block/paride/pg.c                       |  3 ++-
 drivers/block/paride/pt.c                       |  4 +++-
 drivers/block/xd.c                              |  2 +-
 drivers/bluetooth/btusb.c                       | 12 ++++++------
 drivers/bluetooth/hci_bcsp.c                    |  4 ++--
 drivers/bluetooth/hci_ldisc.c                   |  2 +-
 drivers/cdrom/cdrom.c                           | 12 ++++++------
 drivers/char/agp/amd64-agp.c                    |  2 +-
 drivers/char/agp/sis-agp.c                      |  2 +-
 drivers/char/i8k.c                              |  8 ++++----
 drivers/char/ipmi/ipmi_si_intf.c                |  2 +-
 drivers/char/lp.c                               |  2 +-
 drivers/char/nwflash.c                          |  2 +-
 drivers/char/pcmcia/synclink_cs.c               |  2 +-
 drivers/char/random.c                           |  2 +-
 drivers/char/tpm/tpm_tis.c                      |  6 +++---
 drivers/edac/r82600_edac.c                      |  2 +-
 drivers/firewire/sbp2.c                         |  2 +-
 drivers/hid/hid-prodikeys.c                     |  2 +-
 drivers/hwmon/abituguru.c                       |  2 +-
 drivers/hwmon/abituguru3.c                      |  4 ++--
 drivers/hwmon/acpi_power_meter.c                |  2 +-
 drivers/hwmon/adm1021.c                         |  2 +-
 drivers/hwmon/ads7828.c                         |  4 ++--
 drivers/hwmon/dme1737.c                         |  4 ++--
 drivers/hwmon/it87.c                            |  4 ++--
 drivers/hwmon/lm93.c                            |  4 ++--
 drivers/hwmon/max1668.c                         |  2 +-
 drivers/hwmon/w83627hf.c                        |  2 +-
 drivers/hwmon/w83781d.c                         |  4 ++--
 drivers/hwmon/w83791d.c                         |  4 ++--
 drivers/hwmon/w83792d.c                         |  2 +-
 drivers/hwmon/w83793.c                          |  2 +-
 drivers/hwmon/w83795.c                          |  2 +-
 drivers/hwmon/w83l786ng.c                       |  2 +-
 drivers/i2c/busses/i2c-highlander.c             |  2 +-
 drivers/i2c/busses/i2c-ibm_iic.c                |  4 ++--
 drivers/i2c/busses/i2c-sis630.c                 |  4 ++--
 drivers/i2c/busses/i2c-viapro.c                 |  2 +-
 drivers/ide/ali14xx.c                           |  2 +-
 drivers/ide/cmd640.c                            |  2 +-
 drivers/ide/dtc2278.c                           |  2 +-
 drivers/ide/gayle.c                             |  2 +-
 drivers/ide/ht6560b.c                           |  2 +-
 drivers/ide/ide-4drives.c                       |  2 +-
 drivers/ide/ide-acpi.c                          |  6 +++---
 drivers/ide/ide-pci-generic.c                   |  2 +-
 drivers/ide/qd65xx.c                            |  2 +-
 drivers/ide/umc8672.c                           |  2 +-
 drivers/infiniband/hw/ehca/ehca_classes.h       |  4 ++--
 drivers/infiniband/hw/ehca/ehca_main.c          |  8 ++++----
 drivers/infiniband/hw/nes/nes.c                 |  2 +-
 drivers/input/joystick/xpad.c                   |  6 +++---
 drivers/input/misc/wistron_btns.c               |  2 +-
 drivers/input/mouse/psmouse-base.c              |  2 +-
 drivers/input/mouse/synaptics_i2c.c             |  6 +++---
 drivers/input/serio/hp_sdc.c                    |  2 +-
 drivers/input/touchscreen/eeti_ts.c             |  4 ++--
 drivers/input/touchscreen/htcpen.c              |  4 ++--
 drivers/input/touchscreen/ucb1400_ts.c          |  2 +-
 drivers/input/touchscreen/usbtouchscreen.c      |  4 ++--
 drivers/isdn/hardware/avm/b1dma.c               |  2 +-
 drivers/isdn/hardware/avm/c4.c                  |  2 +-
 drivers/isdn/sc/init.c                          |  2 +-
 drivers/leds/leds-clevo-mail.c                  |  2 +-
 drivers/leds/leds-ss4200.c                      |  2 +-
 drivers/macintosh/ams/ams-core.c                |  2 +-
 drivers/macintosh/ams/ams-input.c               |  4 ++--
 drivers/macintosh/therm_adt746x.c               |  2 +-
 drivers/media/dvb/dvb-usb/af9005.c              |  2 +-
 drivers/media/dvb/dvb-usb/af9005.h              |  2 +-
 drivers/media/radio/radio-gemtek.c              | 10 +++++-----
 drivers/media/radio/radio-miropcm20.c           |  2 +-
 drivers/media/rc/lirc_dev.c                     |  2 +-
 drivers/media/rc/mceusb.c                       |  4 ++--
 drivers/media/rc/streamzap.c                    |  4 ++--
 drivers/media/rc/winbond-cir.c                  |  4 ++--
 drivers/media/video/c-qcam.c                    |  2 +-
 drivers/media/video/cs5345.c                    |  2 +-
 drivers/media/video/cs53l32a.c                  |  2 +-
 drivers/media/video/cx18/cx18-driver.c          |  2 +-
 drivers/media/video/cx25821/cx25821-alsa.c      |  2 +-
 drivers/media/video/cx88/cx88-alsa.c            |  2 +-
 drivers/media/video/gspca/m5602/m5602_core.c    |  4 ++--
 drivers/media/video/gspca/m5602/m5602_mt9m111.h |  2 +-
 drivers/media/video/gspca/m5602/m5602_ov7660.h  |  2 +-
 drivers/media/video/gspca/m5602/m5602_ov9650.h  |  2 +-
 drivers/media/video/gspca/m5602/m5602_po1030.h  |  2 +-
 drivers/media/video/gspca/m5602/m5602_s5k4aa.h  |  2 +-
 drivers/media/video/gspca/m5602/m5602_s5k83a.h  |  2 +-
 drivers/media/video/gspca/stv06xx/stv06xx.c     |  4 ++--
 drivers/media/video/hdpvr/hdpvr-core.c          |  2 +-
 drivers/media/video/ivtv/ivtv-driver.c          |  2 +-
 drivers/media/video/ivtv/ivtvfb.c               |  2 +-
 drivers/media/video/marvell-ccic/mcam-core.c    |  6 +++---
 drivers/media/video/msp3400-driver.c            |  6 +++---
 drivers/media/video/msp3400-driver.h            |  6 +++---
 drivers/media/video/omap/omap_vout.c            |  6 +++---
 drivers/media/video/omap/omap_vout_vrfb.c       |  2 +-
 drivers/media/video/ov7670.c                    |  2 +-
 drivers/media/video/saa7115.c                   |  2 +-
 drivers/media/video/stk-webcam.c                |  4 ++--
 drivers/media/video/tm6000/tm6000-alsa.c        |  2 +-
 drivers/media/video/tvp514x.c                   |  2 +-
 drivers/media/video/tvp7002.c                   |  2 +-
 drivers/media/video/upd64083.c                  |  2 +-
 drivers/media/video/via-camera.c                |  4 ++--
 drivers/media/video/zoran/zoran_device.c        |  2 +-
 drivers/media/video/zoran/zr36060.c             |  2 +-
 drivers/memstick/host/jmb38x_ms.c               |  2 +-
 drivers/memstick/host/r592.c                    |  2 +-
 drivers/memstick/host/tifm_ms.c                 |  2 +-
 drivers/misc/iwmc3200top/main.c                 | 12 ++++++------
 drivers/mmc/core/core.c                         |  6 +++---
 drivers/mmc/core/core.h                         |  2 +-
 drivers/mmc/host/tifm_sd.c                      |  4 ++--
 drivers/mmc/host/vub300.c                       | 10 +++++-----
 drivers/mtd/nand/pxa3xx_nand.c                  |  2 +-
 drivers/mtd/nand/r852.c                         |  2 +-
 drivers/parport/parport_ip32.c                  |  2 +-
 drivers/pci/hotplug/acpi_pcihp.c                |  2 +-
 drivers/pci/hotplug/acpiphp_core.c              |  2 +-
 drivers/pci/hotplug/acpiphp_ibm.c               |  2 +-
 drivers/pci/hotplug/cpcihp_zt5550.c             |  4 ++--
 drivers/pci/hotplug/cpqphp_core.c               |  4 ++--
 drivers/pci/hotplug/ibmphp_core.c               |  2 +-
 drivers/pci/hotplug/pci_hotplug_core.c          |  2 +-
 drivers/pci/hotplug/pciehp.h                    |  6 +++---
 drivers/pci/hotplug/pciehp_core.c               |  6 +++---
 drivers/pci/hotplug/pcihp_skeleton.c            |  2 +-
 drivers/pci/hotplug/rpaphp.h                    |  2 +-
 drivers/pci/hotplug/rpaphp_core.c               |  2 +-
 drivers/pci/hotplug/shpchp.h                    |  4 ++--
 drivers/pci/hotplug/shpchp_core.c               |  4 ++--
 drivers/pci/pcie/aer/aer_inject.c               |  2 +-
 drivers/pci/pcie/aer/aerdrv_core.c              |  4 ++--
 drivers/pcmcia/yenta_socket.c                   |  6 +++---
 drivers/platform/x86/compal-laptop.c            |  2 +-
 drivers/platform/x86/intel_oaktrail.c           |  2 +-
 drivers/platform/x86/msi-laptop.c               |  2 +-
 drivers/platform/x86/samsung-laptop.c           |  4 ++--
 drivers/platform/x86/thinkpad_acpi.c            | 16 ++++++++--------
 drivers/platform/x86/wmi.c                      |  4 ++--
 drivers/power/ds2760_battery.c                  |  2 +-
 drivers/s390/char/raw3270.c                     |  2 +-
 drivers/s390/char/vmwatchdog.c                  |  4 ++--
 drivers/scsi/aha1542.c                          |  2 +-
 drivers/scsi/dc395x.c                           |  2 +-
 drivers/scsi/nsp32.c                            |  4 ++--
 drivers/scsi/pcmcia/nsp_cs.c                    |  2 +-
 drivers/staging/comedi/comedi_fops.c            |  2 +-
 drivers/staging/comedi/comedi_fops.h            |  3 ++-
 drivers/staging/media/go7007/snd-go7007.c       |  2 +-
 drivers/staging/media/lirc/lirc_bt829.c         |  2 +-
 drivers/staging/media/lirc/lirc_igorplugusb.c   |  4 ++--
 drivers/staging/media/lirc/lirc_parallel.c      |  4 ++--
 drivers/staging/media/lirc/lirc_serial.c        | 10 +++++-----
 drivers/staging/media/lirc/lirc_sir.c           |  2 +-
 drivers/staging/media/lirc/lirc_zilog.c         |  4 ++--
 drivers/staging/quatech_usb2/quatech_usb2.c     |  2 +-
 drivers/staging/serqt_usb2/serqt_usb2.c         |  2 +-
 drivers/staging/speakup/speakup.h               |  2 +-
 drivers/staging/speakup/synth.c                 |  2 +-
 drivers/staging/vme/bridges/vme_tsi148.c        |  2 +-
 drivers/tty/rocket.c                            |  2 +-
 drivers/tty/synclink.c                          |  2 +-
 drivers/tty/synclinkmp.c                        |  2 +-
 drivers/usb/atm/speedtch.c                      |  6 +++---
 drivers/usb/atm/ueagle-atm.c                    |  2 +-
 drivers/usb/core/devio.c                        |  2 +-
 drivers/usb/core/hub.c                          |  8 ++++----
 drivers/usb/core/usb.c                          |  2 +-
 drivers/usb/gadget/amd5536udc.c                 |  8 ++++----
 drivers/usb/gadget/ether.c                      |  4 ++--
 drivers/usb/gadget/file_storage.c               | 10 +++++-----
 drivers/usb/gadget/net2272.c                    |  2 +-
 drivers/usb/gadget/net2280.c                    |  6 +++---
 drivers/usb/gadget/omap_udc.c                   |  2 +-
 drivers/usb/gadget/pch_udc.c                    |  2 +-
 drivers/usb/gadget/serial.c                     |  4 ++--
 drivers/usb/gadget/zero.c                       |  2 +-
 drivers/usb/host/ehci-hcd.c                     |  2 +-
 drivers/usb/host/ohci-hcd.c                     |  4 ++--
 drivers/usb/host/oxu210hp-hcd.c                 |  2 +-
 drivers/usb/host/u132-hcd.c                     |  2 +-
 drivers/usb/host/uhci-hcd.c                     |  2 +-
 drivers/usb/misc/ftdi-elan.c                    |  2 +-
 drivers/usb/misc/iowarrior.c                    |  2 +-
 drivers/usb/musb/cppi_dma.c                     |  2 +-
 drivers/usb/musb/musb_core.c                    |  2 +-
 drivers/usb/serial/aircable.c                   |  2 +-
 drivers/usb/serial/ark3116.c                    |  2 +-
 drivers/usb/serial/belkin_sa.c                  |  2 +-
 drivers/usb/serial/ch341.c                      |  2 +-
 drivers/usb/serial/cp210x.c                     |  2 +-
 drivers/usb/serial/cyberjack.c                  |  2 +-
 drivers/usb/serial/cypress_m8.c                 |  6 +++---
 drivers/usb/serial/digi_acceleport.c            |  2 +-
 drivers/usb/serial/empeg.c                      |  2 +-
 drivers/usb/serial/ftdi_sio.c                   |  2 +-
 drivers/usb/serial/funsoft.c                    |  2 +-
 drivers/usb/serial/garmin_gps.c                 |  2 +-
 drivers/usb/serial/io_edgeport.c                |  2 +-
 drivers/usb/serial/io_ti.c                      |  4 ++--
 drivers/usb/serial/ipaq.c                       |  2 +-
 drivers/usb/serial/ipw.c                        |  2 +-
 drivers/usb/serial/ir-usb.c                     |  2 +-
 drivers/usb/serial/iuu_phoenix.c                |  6 +++---
 drivers/usb/serial/keyspan.c                    |  2 +-
 drivers/usb/serial/keyspan_pda.c                |  2 +-
 drivers/usb/serial/kl5kusb105.c                 |  2 +-
 drivers/usb/serial/mct_u232.c                   |  2 +-
 drivers/usb/serial/mos7720.c                    |  2 +-
 drivers/usb/serial/mos7840.c                    |  2 +-
 drivers/usb/serial/navman.c                     |  2 +-
 drivers/usb/serial/omninet.c                    |  2 +-
 drivers/usb/serial/opticon.c                    |  2 +-
 drivers/usb/serial/option.c                     |  2 +-
 drivers/usb/serial/oti6858.c                    |  2 +-
 drivers/usb/serial/pl2303.c                     |  2 +-
 drivers/usb/serial/qcserial.c                   |  2 +-
 drivers/usb/serial/safe_serial.c                |  6 +++---
 drivers/usb/serial/sierra.c                     |  4 ++--
 drivers/usb/serial/spcp8x5.c                    |  2 +-
 drivers/usb/serial/ssu100.c                     |  2 +-
 drivers/usb/serial/symbolserial.c               |  2 +-
 drivers/usb/serial/ti_usb_3410_5052.c           |  2 +-
 drivers/usb/serial/usb-serial.c                 |  2 +-
 drivers/usb/serial/usb_wwan.c                   |  2 +-
 drivers/usb/serial/visor.c                      |  2 +-
 drivers/usb/serial/whiteheat.c                  |  2 +-
 drivers/video/aty/atyfb_base.c                  |  4 ++--
 drivers/video/aty/radeon_base.c                 | 18 +++++++++---------
 drivers/video/cirrusfb.c                        |  2 +-
 drivers/video/hgafb.c                           |  2 +-
 drivers/video/intelfb/intelfbdrv.c              | 16 ++++++++--------
 drivers/video/logo/logo.c                       |  2 +-
 drivers/video/neofb.c                           | 10 +++++-----
 drivers/video/omap/omapfb_main.c                |  4 ++--
 drivers/video/omap2/dss/core.c                  |  2 +-
 drivers/video/omap2/dss/dsi.c                   |  4 ++--
 drivers/video/omap2/dss/dss.h                   |  2 +-
 drivers/video/omap2/omapfb/omapfb-main.c        |  8 ++++----
 drivers/video/omap2/omapfb/omapfb.h             |  2 +-
 drivers/video/pm2fb.c                           |  8 ++++----
 drivers/video/pm3fb.c                           |  4 ++--
 drivers/video/riva/fbdev.c                      |  6 +++---
 drivers/video/smscufx.c                         |  4 ++--
 drivers/video/sstfb.c                           |  6 +++---
 drivers/video/tdfxfb.c                          |  2 +-
 drivers/video/udlfb.c                           |  6 +++---
 drivers/video/uvesafb.c                         |  6 +++---
 drivers/video/vfb.c                             |  2 +-
 drivers/watchdog/f71808e_wdt.c                  |  2 +-
 drivers/watchdog/mpc8xxx_wdt.c                  |  2 +-
 drivers/xen/xen-pciback/conf_space.c            |  2 +-
 drivers/xen/xen-pciback/xenbus.c                |  2 +-
 fs/lockd/mon.c                                  |  2 +-
 fs/nfs/client.c                                 |  2 +-
 fs/nfs/inode.c                                  |  2 +-
 include/acpi/acpixf.h                           |  2 +-
 include/acpi/apei.h                             |  4 ++--
 include/linux/console.h                         |  2 +-
 include/linux/lockd/lockd.h                     |  2 +-
 include/linux/mmc/host.h                        |  2 +-
 security/apparmor/include/apparmor.h            | 10 +++++-----
 security/apparmor/lsm.c                         | 12 ++++++------
 virt/kvm/iommu.c                                |  2 +-
 283 files changed, 471 insertions(+), 465 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c
index cb423f5aef24..c339a0880e6e 100644
--- a/drivers/accessibility/braille/braille_console.c
+++ b/drivers/accessibility/braille/braille_console.c
@@ -44,7 +44,7 @@ MODULE_LICENSE("GPL");
  */
 
 /* Emit various sounds */
-static int sound;
+static bool sound;
 module_param(sound, bool, 0);
 MODULE_PARM_DESC(sound, "emit sounds");
 
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 76dc02f15574..e6652d716e45 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -108,7 +108,7 @@ u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
 /*
  * Optionally enable output from the AML Debug Object.
  */
-u32 ACPI_INIT_GLOBAL(acpi_gbl_enable_aml_debug_object, FALSE);
+bool ACPI_INIT_GLOBAL(acpi_gbl_enable_aml_debug_object, FALSE);
 
 /*
  * Optionally copy the entire DSDT to local memory (instead of simply
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index b8e08cb67a18..ebaf037a787b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -118,7 +118,7 @@ struct ghes_estatus_cache {
 	struct rcu_head rcu;
 };
 
-int ghes_disable;
+bool ghes_disable;
 module_param_named(disable, ghes_disable, bool, 0);
 
 static int ghes_panic_timeout	__read_mostly = 30;
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 05fee06f4d6e..ee7fddc4665c 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -41,7 +41,7 @@
 
 #define HEST_PFX "HEST: "
 
-int hest_disable;
+bool hest_disable;
 EXPORT_SYMBOL_GPL(hest_disable);
 
 /* HEST table parsing */
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index 19a61136d848..88eb14304667 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -43,7 +43,7 @@ MODULE_AUTHOR("Kristen Carlson Accardi");
 MODULE_DESCRIPTION(ACPI_DOCK_DRIVER_DESCRIPTION);
 MODULE_LICENSE("GPL");
 
-static int immediate_undock = 1;
+static bool immediate_undock = 1;
 module_param(immediate_undock, bool, 0644);
 MODULE_PARM_DESC(immediate_undock, "1 (default) will cause the driver to "
 	"undock immediately when the undock button is pressed, 0 will cause"
diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
index 07f7fea8a4e2..e50e31a518af 100644
--- a/drivers/acpi/pci_slot.c
+++ b/drivers/acpi/pci_slot.c
@@ -34,7 +34,7 @@
 #include <acpi/acpi_drivers.h>
 #include <linux/dmi.h>
 
-static int debug;
+static bool debug;
 static int check_sta_before_sun;
 
 #define DRIVER_VERSION 	"0.1"
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 08a44b532f7c..eaef02afc7cf 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -69,21 +69,21 @@ MODULE_AUTHOR("Bruno Ducrot");
 MODULE_DESCRIPTION("ACPI Video Driver");
 MODULE_LICENSE("GPL");
 
-static int brightness_switch_enabled = 1;
+static bool brightness_switch_enabled = 1;
 module_param(brightness_switch_enabled, bool, 0644);
 
 /*
  * By default, we don't allow duplicate ACPI video bus devices
  * under the same VGA controller
  */
-static int allow_duplicates;
+static bool allow_duplicates;
 module_param(allow_duplicates, bool, 0644);
 
 /*
  * Some BIOSes claim they use minimum backlight at boot,
  * and this may bring dimming screen after boot
  */
-static int use_bios_initial_backlight = 1;
+static bool use_bios_initial_backlight = 1;
 module_param(use_bios_initial_backlight, bool, 0644);
 
 static int register_count = 0;
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index e0bc9646a38e..55d6179dde58 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -599,9 +599,9 @@ MODULE_LICENSE("GPL");
 MODULE_DEVICE_TABLE(pci, nv_pci_tbl);
 MODULE_VERSION(DRV_VERSION);
 
-static int adma_enabled;
-static int swncq_enabled = 1;
-static int msi_enabled;
+static bool adma_enabled;
+static bool swncq_enabled = 1;
+static bool msi_enabled;
 
 static void nv_adma_register_mode(struct ata_port *ap)
 {
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 1e9140626a83..e7e610aa9a7a 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -417,7 +417,7 @@ static struct ata_port_operations sil24_ops = {
 #endif
 };
 
-static int sata_sil24_msi;    /* Disable MSI */
+static bool sata_sil24_msi;    /* Disable MSI */
 module_param_named(msi, sata_sil24_msi, bool, S_IRUGO);
 MODULE_PARM_DESC(msi, "Enable MSI (Default: false)");
 
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 9a51df4f5b74..b182c2f7d777 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -112,12 +112,12 @@ static u8 read_prom_byte(struct he_dev *he_dev, int addr);
 /* globals */
 
 static struct he_dev *he_devs;
-static int disable64;
+static bool disable64;
 static short nvpibits = -1;
 static short nvcibits = -1;
 static short rx_skb_reserve = 16;
-static int irq_coalesce = 1;
-static int sdh = 0;
+static bool irq_coalesce = 1;
+static bool sdh = 0;
 
 /* Read from EEPROM = 0000 0011b */
 static unsigned int readtab[] = {
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 9cf20355ceec..8d680562ba73 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -59,8 +59,8 @@
 
 /* module parameter, defined in drbd_main.c */
 extern unsigned int minor_count;
-extern int disable_sendpage;
-extern int allow_oos;
+extern bool disable_sendpage;
+extern bool allow_oos;
 extern unsigned int cn_idx;
 
 #ifdef CONFIG_DRBD_FAULT_INJECTION
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 0358e55356c8..211fc44f84be 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -117,8 +117,8 @@ module_param(fault_devs, int, 0644);
 
 /* module parameter, defined */
 unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
-int disable_sendpage;
-int allow_oos;
+bool disable_sendpage;
+bool allow_oos;
 unsigned int cn_idx = CN_IDX_DRBD;
 int proc_details;       /* Detail level in proc drbd*/
 
diff --git a/drivers/block/paride/bpck6.c b/drivers/block/paride/bpck6.c
index ad124525ac23..ec64e7f5d1ce 100644
--- a/drivers/block/paride/bpck6.c
+++ b/drivers/block/paride/bpck6.c
@@ -20,9 +20,6 @@
 */
 
 
-/* PARAMETERS */
-static int verbose; /* set this to 1 to see debugging messages and whatnot */
-
 #define BACKPACK_VERSION "2.0.2"
 
 #include <linux/module.h>
@@ -36,6 +33,8 @@ static int verbose; /* set this to 1 to see debugging messages and whatnot */
 #include "ppc6lnx.c"
 #include "paride.h"
 
+/* PARAMETERS */
+static bool verbose; /* set this to 1 to see debugging messages and whatnot */
  
 
 #define PPCSTRUCT(pi) ((Interface *)(pi->private))
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 869e7676d46f..831e3ac156e6 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -124,8 +124,9 @@
    by default.
 
 */
+#include <linux/types.h>
 
-static int verbose = 0;
+static bool verbose = 0;
 static int major = PD_MAJOR;
 static char *name = PD_NAME;
 static int cluster = 64;
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f21b520ef419..ec8f9ed6326e 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -118,13 +118,15 @@
 #define PF_NAME		"pf"
 #define PF_UNITS	4
 
+#include <linux/types.h>
+
 /* Here are things one can override from the insmod command.
    Most are autoprobed by paride unless set here.  Verbose is off
    by default.
 
 */
 
-static int verbose = 0;
+static bool verbose = 0;
 static int major = PF_MAJOR;
 static char *name = PF_NAME;
 static int cluster = 64;
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index a79fb4f7ff62..4a27b1de5fcb 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -130,13 +130,14 @@
 #define PI_PG	4
 #endif
 
+#include <linux/types.h>
 /* Here are things one can override from the insmod command.
    Most are autoprobed by paride unless set here.  Verbose is 0
    by default.
 
 */
 
-static int verbose = 0;
+static bool verbose = 0;
 static int major = PG_MAJOR;
 static char *name = PG_NAME;
 static int disable = 0;
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 7179f79d7468..2596042eb987 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -109,13 +109,15 @@
 #define PT_NAME		"pt"
 #define PT_UNITS	4
 
+#include <linux/types.h>
+
 /* Here are things one can override from the insmod command.
    Most are autoprobed by paride unless set here.  Verbose is on
    by default.
 
 */
 
-static int verbose = 0;
+static bool verbose = 0;
 static int major = PT_MAJOR;
 static char *name = PT_NAME;
 static int disable = 0;
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 4abd2bcd20fb..51a972704db5 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -148,7 +148,7 @@ static volatile int xdc_busy;
 static struct timer_list xd_watchdog_int;
 
 static volatile u_char xd_error;
-static int nodma = XD_DONT_USE_DMA;
+static bool nodma = XD_DONT_USE_DMA;
 
 static struct request_queue *xd_queue;
 
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 55ac349695c4..f00f596c1029 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -37,13 +37,13 @@
 
 #define VERSION "0.6"
 
-static int ignore_dga;
-static int ignore_csr;
-static int ignore_sniffer;
-static int disable_scofix;
-static int force_scofix;
+static bool ignore_dga;
+static bool ignore_csr;
+static bool ignore_sniffer;
+static bool disable_scofix;
+static bool force_scofix;
 
-static int reset = 1;
+static bool reset = 1;
 
 static struct usb_driver btusb_driver;
 
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index 9c5b2dc38e29..a767d4de45a4 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -49,8 +49,8 @@
 
 #define VERSION "0.3"
 
-static int txcrc = 1;
-static int hciextn = 1;
+static bool txcrc = 1;
+static bool hciextn = 1;
 
 #define BCSP_TXWINSIZE	4
 
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 48ad2a7ab080..07114489994f 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -48,7 +48,7 @@
 
 #define VERSION "2.2"
 
-static int reset = 0;
+static bool reset = 0;
 
 static struct hci_uart_proto *hup[HCI_UART_MAX_PROTO];
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 2118211aff99..1bbf7645a97c 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -285,17 +285,17 @@
 #include <asm/uaccess.h>
 
 /* used to tell the module to turn on full debugging messages */
-static int debug;
+static bool debug;
 /* used to keep tray locked at all times */
 static int keeplocked;
 /* default compatibility mode */
-static int autoclose=1;
-static int autoeject;
-static int lockdoor = 1;
+static bool autoclose=1;
+static bool autoeject;
+static bool lockdoor = 1;
 /* will we ever get to use this... sigh. */
-static int check_media_type;
+static bool check_media_type;
 /* automatically restart mrw format */
-static int mrw_format_restart = 1;
+static bool mrw_format_restart = 1;
 module_param(debug, bool, 0);
 module_param(autoclose, bool, 0);
 module_param(autoeject, bool, 0);
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 780498d76581..444f8b6ab411 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -33,7 +33,7 @@
 #define ULI_X86_64_ENU_SCR_REG		0x54
 
 static struct resource *aperture_resource;
-static int __initdata agp_try_unsupported = 1;
+static bool __initdata agp_try_unsupported = 1;
 static int agp_bridges_found;
 
 static void amd64_tlbflush(struct agp_memory *temp)
diff --git a/drivers/char/agp/sis-agp.c b/drivers/char/agp/sis-agp.c
index 29aacd81de78..08704ae53956 100644
--- a/drivers/char/agp/sis-agp.c
+++ b/drivers/char/agp/sis-agp.c
@@ -17,7 +17,7 @@
 #define PCI_DEVICE_ID_SI_662	0x0662
 #define PCI_DEVICE_ID_SI_671	0x0671
 
-static int __devinitdata agp_sis_force_delay = 0;
+static bool __devinitdata agp_sis_force_delay = 0;
 static int __devinitdata agp_sis_agp_spec = -1;
 
 static int sis_fetch_size(void)
diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index 6e40072fbf67..40cc0cf2ded6 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -69,19 +69,19 @@ MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)");
 MODULE_DESCRIPTION("Driver for accessing SMM BIOS on Dell laptops");
 MODULE_LICENSE("GPL");
 
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Force loading without checking for supported models");
 
-static int ignore_dmi;
+static bool ignore_dmi;
 module_param(ignore_dmi, bool, 0);
 MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match");
 
-static int restricted;
+static bool restricted;
 module_param(restricted, bool, 0);
 MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
 
-static int power_status;
+static bool power_status;
 module_param(power_status, bool, 0600);
 MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
 
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 9397ab49b72e..50fcf9c04569 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1227,7 +1227,7 @@ static int smi_num; /* Used to sequence the SMIs */
 #define DEFAULT_REGSPACING	1
 #define DEFAULT_REGSIZE		1
 
-static int           si_trydefaults = 1;
+static bool          si_trydefaults = 1;
 static char          *si_type[SI_MAX_PARMS];
 #define MAX_SI_TYPE_STR 30
 static char          si_type_str[MAX_SI_TYPE_STR];
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 97c3edb95ae7..f43485607063 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -829,7 +829,7 @@ static struct console lpcons = {
 
 static int parport_nr[LP_NO] = { [0 ... LP_NO-1] = LP_PARPORT_UNSPEC };
 static char *parport[LP_NO];
-static int reset;
+static bool reset;
 
 module_param_array(parport, charp, NULL, 0);
 module_param(reset, bool, 0);
diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c
index a12f52400dbc..bf586ae1ee83 100644
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -51,7 +51,7 @@ static int write_block(unsigned long p, const char __user *buf, int count);
 #define KFLASH_ID	0x89A6		//Intel flash
 #define KFLASH_ID4	0xB0D4		//Intel flash 4Meg
 
-static int flashdebug;		//if set - we will display progress msgs
+static bool flashdebug;		//if set - we will display progress msgs
 
 static int gbWriteEnable;
 static int gbWriteBase64Enable;
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 15781396af25..07f6a5abe372 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -439,7 +439,7 @@ static int mgslpc_device_count = 0;
  * .text section address and breakpoint on module load.
  * This is useful for use with gdb and add-symbol-file command.
  */
-static int break_on_load=0;
+static bool break_on_load=0;
 
 /*
  * Driver major number, defaults to zero to get auto
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 85da8740586b..732215b805c1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -387,7 +387,7 @@ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
 static struct fasync_struct *fasync;
 
 #if 0
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 #define DEBUG_ENT(fmt, arg...) do { \
 	if (debug) \
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 10cc44ceb5d1..a1748621111b 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -255,7 +255,7 @@ out:
 	return size;
 }
 
-static int itpm;
+static bool itpm;
 module_param(itpm, bool, 0444);
 MODULE_PARM_DESC(itpm, "Force iTPM workarounds (found on some Lenovo laptops)");
 
@@ -500,7 +500,7 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int interrupts = 1;
+static bool interrupts = 1;
 module_param(interrupts, bool, 0444);
 MODULE_PARM_DESC(interrupts, "Enable interrupts");
 
@@ -828,7 +828,7 @@ static struct platform_driver tis_drv = {
 
 static struct platform_device *pdev;
 
-static int force;
+static bool force;
 module_param(force, bool, 0444);
 MODULE_PARM_DESC(force, "Force device probe rather than using ACPI entry");
 static int __init init_tis(void)
diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c
index b153674431f1..e294e1b3616c 100644
--- a/drivers/edac/r82600_edac.c
+++ b/drivers/edac/r82600_edac.c
@@ -131,7 +131,7 @@ struct r82600_error_info {
 	u32 eapr;
 };
 
-static unsigned int disable_hardware_scrub;
+static bool disable_hardware_scrub;
 
 static struct edac_pci_ctl_info *r82600_pci;
 
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 68375bc3aef6..80e95aa3bf14 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -66,7 +66,7 @@
  *
  * Concurrent logins are useful together with cluster filesystems.
  */
-static int sbp2_param_exclusive_login = 1;
+static bool sbp2_param_exclusive_login = 1;
 module_param_named(exclusive_login, sbp2_param_exclusive_login, bool, 0644);
 MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
 		 "(default = Y, use N for concurrent initiators)");
diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c
index f779009104eb..b71b77ab0dc7 100644
--- a/drivers/hid/hid-prodikeys.c
+++ b/drivers/hid/hid-prodikeys.c
@@ -90,7 +90,7 @@ static const char longname[] = "Prodikeys PC-MIDI Keyboard";
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
-static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
 
 module_param_array(index, int, NULL, 0444);
 module_param_array(id, charp, NULL, 0444);
diff --git a/drivers/hwmon/abituguru.c b/drivers/hwmon/abituguru.c
index 65a35cf5b3c5..3b728e8f169b 100644
--- a/drivers/hwmon/abituguru.c
+++ b/drivers/hwmon/abituguru.c
@@ -145,7 +145,7 @@ static const u8 abituguru_pwm_max[5] = { 0, 255, 255, 75, 75 };
 
 
 /* Insmod parameters */
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Set to one to force detection.");
 static int bank1_types[ABIT_UGURU_MAX_BANK1_SENSORS] = { -1, -1, -1, -1, -1,
diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c
index d30855a75786..34a14a77e008 100644
--- a/drivers/hwmon/abituguru3.c
+++ b/drivers/hwmon/abituguru3.c
@@ -603,11 +603,11 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = {
 
 
 /* Insmod parameters */
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Set to one to force detection.");
 /* Default verbose is 1, since this driver is still in the testing phase */
-static int verbose = 1;
+static bool verbose = 1;
 module_param(verbose, bool, 0644);
 MODULE_PARM_DESC(verbose, "Enable/disable verbose error reporting");
 
diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c
index 522860ab6ce8..554f046bcf20 100644
--- a/drivers/hwmon/acpi_power_meter.c
+++ b/drivers/hwmon/acpi_power_meter.c
@@ -58,7 +58,7 @@ ACPI_MODULE_NAME(ACPI_POWER_METER_NAME);
 #define POWER_ALARM_NAME	"power1_alarm"
 
 static int cap_in_hardware;
-static int force_cap_on;
+static bool force_cap_on;
 
 static int can_cap_in_hardware(void)
 {
diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index 1ad0a885c5a5..0158cc35cb2e 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -103,7 +103,7 @@ static int adm1021_remove(struct i2c_client *client);
 static struct adm1021_data *adm1021_update_device(struct device *dev);
 
 /* (amalysh) read only mode, otherwise any limit's writing confuse BIOS */
-static int read_only;
+static bool read_only;
 
 
 static const struct i2c_device_id adm1021_id[] = {
diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index cfcc3b6fb6bf..ed60242d6a0a 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -48,8 +48,8 @@ static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b,
 	I2C_CLIENT_END };
 
 /* Module parameters */
-static int se_input = 1; /* Default is SE, 0 == diff */
-static int int_vref = 1; /* Default is internal ref ON */
+static bool se_input = 1; /* Default is SE, 0 == diff */
+static bool int_vref = 1; /* Default is internal ref ON */
 static int vref_mv = ADS7828_INT_VREF_MV; /* set if vref != 2.5V */
 module_param(se_input, bool, S_IRUGO);
 module_param(int_vref, bool, S_IRUGO);
diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index d9803958e49f..ffb229af7861 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -45,7 +45,7 @@
 static struct platform_device *pdev;
 
 /* Module load parameters */
-static int force_start;
+static bool force_start;
 module_param(force_start, bool, 0);
 MODULE_PARM_DESC(force_start, "Force the chip to start monitoring inputs");
 
@@ -53,7 +53,7 @@ static unsigned short force_id;
 module_param(force_id, ushort, 0);
 MODULE_PARM_DESC(force_id, "Override the detected device ID");
 
-static int probe_all_addr;
+static bool probe_all_addr;
 module_param(probe_all_addr, bool, 0);
 MODULE_PARM_DESC(probe_all_addr, "Include probing of non-standard LPC "
 		 "addresses");
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 38c0b87676de..603ef2af2707 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -146,10 +146,10 @@ static inline void superio_exit(void)
 #define IT87_SIO_BEEP_PIN_REG	0xf6	/* Beep pin mapping */
 
 /* Update battery voltage after every reading if true */
-static int update_vbat;
+static bool update_vbat;
 
 /* Not all BIOSes properly configure the PWM registers */
-static int fix_pwm_polarity;
+static bool fix_pwm_polarity;
 
 /* Many IT87 constants specified below */
 
diff --git a/drivers/hwmon/lm93.c b/drivers/hwmon/lm93.c
index 3b43df418613..8bd6c5c9e05b 100644
--- a/drivers/hwmon/lm93.c
+++ b/drivers/hwmon/lm93.c
@@ -151,12 +151,12 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
 
 /* Insmod parameters */
 
-static int disable_block;
+static bool disable_block;
 module_param(disable_block, bool, 0);
 MODULE_PARM_DESC(disable_block,
 	"Set to non-zero to disable SMBus block data transactions.");
 
-static int init;
+static bool init;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to non-zero to force chip initialization.");
 
diff --git a/drivers/hwmon/max1668.c b/drivers/hwmon/max1668.c
index 6914195cfd35..88953f99e914 100644
--- a/drivers/hwmon/max1668.c
+++ b/drivers/hwmon/max1668.c
@@ -59,7 +59,7 @@ static unsigned short max1668_addr_list[] = {
 #define DEV_ID_MAX1989		0xb
 
 /* read only mode module parameter */
-static int read_only;
+static bool read_only;
 module_param(read_only, bool, 0);
 MODULE_PARM_DESC(read_only, "Don't set any values, read only mode");
 
diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c
index bde50e34d013..374118f2b9f9 100644
--- a/drivers/hwmon/w83627hf.c
+++ b/drivers/hwmon/w83627hf.c
@@ -71,7 +71,7 @@ module_param(force_i2c, byte, 0);
 MODULE_PARM_DESC(force_i2c,
 		 "Initialize the i2c address of the sensors");
 
-static int init = 1;
+static bool init = 1;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to zero to bypass chip initialization");
 
diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c
index 65b685e2c7b7..17a8fa2d9ae9 100644
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -67,11 +67,11 @@ module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
 		    "{bus, clientaddr, subclientaddr1, subclientaddr2}");
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to one to reset chip on load");
 
-static int init = 1;
+static bool init = 1;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to zero to bypass chip initialization");
 
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index 6e5d0ae594b0..35aa5149307a 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -58,11 +58,11 @@ module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
 			"{bus, clientaddr, subclientaddr1, subclientaddr2}");
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to one to force a hardware chip reset");
 
-static int init;
+static bool init;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to one to force extra software initialization");
 
diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c
index 9ded133e43f0..d3100eab6b2f 100644
--- a/drivers/hwmon/w83792d.c
+++ b/drivers/hwmon/w83792d.c
@@ -56,7 +56,7 @@ module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
 			"{bus, clientaddr, subclientaddr1, subclientaddr2}");
 
-static int init;
+static bool init;
 module_param(init, bool, 0);
 MODULE_PARM_DESC(init, "Set to one to force chip initialization");
 
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index 3cc6fef22087..45ec7e7c3c27 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -61,7 +61,7 @@ module_param_array(force_subclients, short, NULL, 0);
 MODULE_PARM_DESC(force_subclients, "List of subclient addresses: "
 		       "{bus, clientaddr, subclientaddr1, subclientaddr2}");
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to 1 to reset chip, not recommended");
 
diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c
index 3ee398d0e4c9..aa58b25565bc 100644
--- a/drivers/hwmon/w83795.c
+++ b/drivers/hwmon/w83795.c
@@ -42,7 +42,7 @@ static const unsigned short normal_i2c[] = {
 };
 
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to 1 to reset chip, not recommended");
 
diff --git a/drivers/hwmon/w83l786ng.c b/drivers/hwmon/w83l786ng.c
index 0254e181893d..063bd9508d8a 100644
--- a/drivers/hwmon/w83l786ng.c
+++ b/drivers/hwmon/w83l786ng.c
@@ -39,7 +39,7 @@ static const unsigned short normal_i2c[] = { 0x2e, 0x2f, I2C_CLIENT_END };
 
 /* Insmod parameters */
 
-static int reset;
+static bool reset;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset, "Set to 1 to reset chip, not recommended");
 
diff --git a/drivers/i2c/busses/i2c-highlander.c b/drivers/i2c/busses/i2c-highlander.c
index 63bb1cc2a042..fa88868cb556 100644
--- a/drivers/i2c/busses/i2c-highlander.c
+++ b/drivers/i2c/busses/i2c-highlander.c
@@ -52,7 +52,7 @@ struct highlander_i2c_dev {
 	size_t			buf_len;
 };
 
-static int iic_force_poll, iic_force_normal;
+static bool iic_force_poll, iic_force_normal;
 static int iic_timeout = 1000, iic_read_delay;
 
 static inline void highlander_i2c_irq_enable(struct highlander_i2c_dev *dev)
diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
index 3c110fbc409b..c08ceb957aa7 100644
--- a/drivers/i2c/busses/i2c-ibm_iic.c
+++ b/drivers/i2c/busses/i2c-ibm_iic.c
@@ -51,11 +51,11 @@
 MODULE_DESCRIPTION("IBM IIC driver v" DRIVER_VERSION);
 MODULE_LICENSE("GPL");
 
-static int iic_force_poll;
+static bool iic_force_poll;
 module_param(iic_force_poll, bool, 0);
 MODULE_PARM_DESC(iic_force_poll, "Force polling mode");
 
-static int iic_force_fast;
+static bool iic_force_fast;
 module_param(iic_force_fast, bool, 0);
 MODULE_PARM_DESC(iic_force_fast, "Force fast mode (400 kHz)");
 
diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c
index e6f539e26f65..58893772c3d8 100644
--- a/drivers/i2c/busses/i2c-sis630.c
+++ b/drivers/i2c/busses/i2c-sis630.c
@@ -93,8 +93,8 @@
 static struct pci_driver sis630_driver;
 
 /* insmod parameters */
-static int high_clock;
-static int force;
+static bool high_clock;
+static bool force;
 module_param(high_clock, bool, 0);
 MODULE_PARM_DESC(high_clock, "Set Host Master Clock to 56KHz (default 14KHz).");
 module_param(force, bool, 0);
diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c
index 0b012f1f8ac5..2a62c998044a 100644
--- a/drivers/i2c/busses/i2c-viapro.c
+++ b/drivers/i2c/busses/i2c-viapro.c
@@ -91,7 +91,7 @@ static unsigned short SMBHSTCFG = 0xD2;
 
 /* If force is set to anything different from 0, we forcibly enable the
    VT596. DANGEROUS! */
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Forcibly enable the SMBus. DANGEROUS!");
 
diff --git a/drivers/ide/ali14xx.c b/drivers/ide/ali14xx.c
index 25b9fe3a9f8e..d3be99fb4154 100644
--- a/drivers/ide/ali14xx.c
+++ b/drivers/ide/ali14xx.c
@@ -221,7 +221,7 @@ static int __init ali14xx_probe(void)
 	return ide_legacy_device_add(&ali14xx_port_info, 0);
 }
 
-static int probe_ali14xx;
+static bool probe_ali14xx;
 
 module_param_named(probe, probe_ali14xx, bool, 0);
 MODULE_PARM_DESC(probe, "probe for ALI M14xx chipsets");
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
index a81bd7575792..14717304b388 100644
--- a/drivers/ide/cmd640.c
+++ b/drivers/ide/cmd640.c
@@ -111,7 +111,7 @@
 
 #define DRV_NAME "cmd640"
 
-static int cmd640_vlb;
+static bool cmd640_vlb;
 
 /*
  * CMD640 specific registers definition.
diff --git a/drivers/ide/dtc2278.c b/drivers/ide/dtc2278.c
index 6929f7fce93a..46af4743b3e6 100644
--- a/drivers/ide/dtc2278.c
+++ b/drivers/ide/dtc2278.c
@@ -130,7 +130,7 @@ static int __init dtc2278_probe(void)
 	return ide_legacy_device_add(&dtc2278_port_info, 0);
 }
 
-static int probe_dtc2278;
+static bool probe_dtc2278;
 
 module_param_named(probe, probe_dtc2278, bool, 0);
 MODULE_PARM_DESC(probe, "probe for DTC2278xx chipsets");
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
index 3feaa26410be..51beb85250d4 100644
--- a/drivers/ide/gayle.c
+++ b/drivers/ide/gayle.c
@@ -50,7 +50,7 @@
 					       GAYLE_NUM_HWIFS-1)
 #define GAYLE_HAS_CONTROL_REG	(!ide_doubler)
 
-static int ide_doubler;
+static bool ide_doubler;
 module_param_named(doubler, ide_doubler, bool, 0);
 MODULE_PARM_DESC(doubler, "enable support for IDE doublers");
 
diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c
index 808bcdcbf8e1..986f2513eab4 100644
--- a/drivers/ide/ht6560b.c
+++ b/drivers/ide/ht6560b.c
@@ -317,7 +317,7 @@ static void __init ht6560b_init_dev(ide_drive_t *drive)
 	ide_set_drivedata(drive, (void *)t);
 }
 
-static int probe_ht6560b;
+static bool probe_ht6560b;
 
 module_param_named(probe, probe_ht6560b, bool, 0);
 MODULE_PARM_DESC(probe, "probe for HT6560B chipset");
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
index 979d342c338a..547d7cf2e016 100644
--- a/drivers/ide/ide-4drives.c
+++ b/drivers/ide/ide-4drives.c
@@ -6,7 +6,7 @@
 
 #define DRV_NAME "ide-4drives"
 
-static int probe_4drives;
+static bool probe_4drives;
 
 module_param_named(probe, probe_4drives, bool, 0);
 MODULE_PARM_DESC(probe, "probe for generic IDE chipset with 4 drives/port");
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index f22edc66b030..f1a6796b165c 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -53,15 +53,15 @@ struct ide_acpi_hwif_link {
 #define DEBPRINT(fmt, args...)	do {} while (0)
 #endif	/* DEBUGGING */
 
-static int ide_noacpi;
+static bool ide_noacpi;
 module_param_named(noacpi, ide_noacpi, bool, 0);
 MODULE_PARM_DESC(noacpi, "disable IDE ACPI support");
 
-static int ide_acpigtf;
+static bool ide_acpigtf;
 module_param_named(acpigtf, ide_acpigtf, bool, 0);
 MODULE_PARM_DESC(acpigtf, "enable IDE ACPI _GTF support");
 
-static int ide_acpionboot;
+static bool ide_acpionboot;
 module_param_named(acpionboot, ide_acpionboot, bool, 0);
 MODULE_PARM_DESC(acpionboot, "call IDE ACPI methods on boot");
 
diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c
index a743e68a8903..7f56b738d762 100644
--- a/drivers/ide/ide-pci-generic.c
+++ b/drivers/ide/ide-pci-generic.c
@@ -28,7 +28,7 @@
 
 #define DRV_NAME "ide_pci_generic"
 
-static int ide_generic_all;		/* Set to claim all devices */
+static bool ide_generic_all;		/* Set to claim all devices */
 
 module_param_named(all_generic_ide, ide_generic_all, bool, 0444);
 MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers.");
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
index 3f0244fd8e62..8bbfe5557c7b 100644
--- a/drivers/ide/qd65xx.c
+++ b/drivers/ide/qd65xx.c
@@ -417,7 +417,7 @@ static int __init qd_probe(int base)
 	return rc;
 }
 
-static int probe_qd65xx;
+static bool probe_qd65xx;
 
 module_param_named(probe, probe_qd65xx, bool, 0);
 MODULE_PARM_DESC(probe, "probe for QD65xx chipsets");
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 47adcd09cb26..5cfb78120669 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -160,7 +160,7 @@ static int __init umc8672_probe(void)
 	return ide_legacy_device_add(&umc8672_port_info, 0);
 }
 
-static int probe_umc8672;
+static bool probe_umc8672;
 
 module_param_named(probe, probe_umc8672, bool, 0);
 MODULE_PARM_DESC(probe, "probe for UMC8672 chipset");
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index aaf6023a4835..f08f6eaf3fa8 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -379,8 +379,8 @@ extern spinlock_t shca_list_lock;
 
 extern int ehca_static_rate;
 extern int ehca_port_act_time;
-extern int ehca_use_hp_mr;
-extern int ehca_scaling_code;
+extern bool ehca_use_hp_mr;
+extern bool ehca_scaling_code;
 extern int ehca_lock_hcalls;
 extern int ehca_nr_ports;
 extern int ehca_max_cq;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 8af8d4f7bdb1..832e7a7d0aee 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -59,16 +59,16 @@ MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
 MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
 MODULE_VERSION(HCAD_VERSION);
 
-static int ehca_open_aqp1     = 0;
+static bool ehca_open_aqp1    = 0;
 static int ehca_hw_level      = 0;
-static int ehca_poll_all_eqs  = 1;
+static bool ehca_poll_all_eqs = 1;
 
 int ehca_debug_level   = 0;
 int ehca_nr_ports      = -1;
-int ehca_use_hp_mr     = 0;
+bool ehca_use_hp_mr    = 0;
 int ehca_port_act_time = 30;
 int ehca_static_rate   = -1;
-int ehca_scaling_code  = 0;
+bool ehca_scaling_code = 0;
 int ehca_lock_hcalls   = -1;
 int ehca_max_cq        = -1;
 int ehca_max_qp        = -1;
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 5965b3df8f2f..7013da5e9eda 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -96,7 +96,7 @@ unsigned int wqm_quanta = 0x10000;
 module_param(wqm_quanta, int, 0644);
 MODULE_PARM_DESC(wqm_quanta, "WQM quanta");
 
-static unsigned int limit_maxrdreqsz;
+static bool limit_maxrdreqsz;
 module_param(limit_maxrdreqsz, bool, 0644);
 MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes");
 
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 32bbd4c77b7c..fd7a0d5bc94d 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -98,15 +98,15 @@
 #define XTYPE_XBOX360W    2
 #define XTYPE_UNKNOWN     3
 
-static int dpad_to_buttons;
+static bool dpad_to_buttons;
 module_param(dpad_to_buttons, bool, S_IRUGO);
 MODULE_PARM_DESC(dpad_to_buttons, "Map D-PAD to buttons rather than axes for unknown pads");
 
-static int triggers_to_buttons;
+static bool triggers_to_buttons;
 module_param(triggers_to_buttons, bool, S_IRUGO);
 MODULE_PARM_DESC(triggers_to_buttons, "Map triggers to buttons rather than axes for unknown pads");
 
-static int sticks_to_null;
+static bool sticks_to_null;
 module_param(sticks_to_null, bool, S_IRUGO);
 MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads");
 
diff --git a/drivers/input/misc/wistron_btns.c b/drivers/input/misc/wistron_btns.c
index 52b419348983..e2bdfd4bea70 100644
--- a/drivers/input/misc/wistron_btns.c
+++ b/drivers/input/misc/wistron_btns.c
@@ -48,7 +48,7 @@ MODULE_DESCRIPTION("Wistron laptop button driver");
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION("0.3");
 
-static int force; /* = 0; */
+static bool force; /* = 0; */
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Load even if computer is not in database");
 
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index de7e8bc17b1f..e6c9931f02c7 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -60,7 +60,7 @@ static unsigned int psmouse_rate = 100;
 module_param_named(rate, psmouse_rate, uint, 0644);
 MODULE_PARM_DESC(rate, "Report rate, in reports per second.");
 
-static unsigned int psmouse_smartscroll = 1;
+static bool psmouse_smartscroll = 1;
 module_param_named(smartscroll, psmouse_smartscroll, bool, 0644);
 MODULE_PARM_DESC(smartscroll, "Logitech Smartscroll autorepeat, 1 = enabled (default), 0 = disabled.");
 
diff --git a/drivers/input/mouse/synaptics_i2c.c b/drivers/input/mouse/synaptics_i2c.c
index 4b755cb5b38c..1c58aafa523f 100644
--- a/drivers/input/mouse/synaptics_i2c.c
+++ b/drivers/input/mouse/synaptics_i2c.c
@@ -185,17 +185,17 @@
 #define NO_DATA_SLEEP_MSECS	(MSEC_PER_SEC / 4)
 
 /* Control touchpad's No Deceleration option */
-static int no_decel = 1;
+static bool no_decel = 1;
 module_param(no_decel, bool, 0644);
 MODULE_PARM_DESC(no_decel, "No Deceleration. Default = 1 (on)");
 
 /* Control touchpad's Reduced Reporting option */
-static int reduce_report;
+static bool reduce_report;
 module_param(reduce_report, bool, 0644);
 MODULE_PARM_DESC(reduce_report, "Reduced Reporting. Default = 0 (off)");
 
 /* Control touchpad's No Filter option */
-static int no_filter;
+static bool no_filter;
 module_param(no_filter, bool, 0644);
 MODULE_PARM_DESC(no_filter, "No Filter. Default = 0 (off)");
 
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
index 979c443bf1ef..be3316073ae7 100644
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -105,7 +105,7 @@ EXPORT_SYMBOL(__hp_sdc_enqueue_transaction);
 EXPORT_SYMBOL(hp_sdc_enqueue_transaction);
 EXPORT_SYMBOL(hp_sdc_dequeue_transaction);
 
-static unsigned int hp_sdc_disabled;
+static bool hp_sdc_disabled;
 module_param_named(no_hpsdc, hp_sdc_disabled, bool, 0);
 MODULE_PARM_DESC(no_hpsdc, "Do not enable HP SDC driver.");
 
diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c
index 7f8f538a9806..1df19bb8534a 100644
--- a/drivers/input/touchscreen/eeti_ts.c
+++ b/drivers/input/touchscreen/eeti_ts.c
@@ -35,11 +35,11 @@
 #include <linux/input/eeti_ts.h>
 #include <linux/slab.h>
 
-static int flip_x;
+static bool flip_x;
 module_param(flip_x, bool, 0644);
 MODULE_PARM_DESC(flip_x, "flip x coordinate");
 
-static int flip_y;
+static bool flip_y;
 module_param(flip_y, bool, 0644);
 MODULE_PARM_DESC(flip_y, "flip y coordinate");
 
diff --git a/drivers/input/touchscreen/htcpen.c b/drivers/input/touchscreen/htcpen.c
index 81e338623944..d13143b68b3e 100644
--- a/drivers/input/touchscreen/htcpen.c
+++ b/drivers/input/touchscreen/htcpen.c
@@ -40,10 +40,10 @@ MODULE_LICENSE("GPL");
 #define X_AXIS_MAX		2040
 #define Y_AXIS_MAX		2040
 
-static int invert_x;
+static bool invert_x;
 module_param(invert_x, bool, 0644);
 MODULE_PARM_DESC(invert_x, "If set, X axis is inverted");
-static int invert_y;
+static bool invert_y;
 module_param(invert_y, bool, 0644);
 MODULE_PARM_DESC(invert_y, "If set, Y axis is inverted");
 
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index d2b57536feea..46e83ad53f43 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -30,7 +30,7 @@
 
 #define UCB1400_TS_POLL_PERIOD	10 /* ms */
 
-static int adcsync;
+static bool adcsync;
 static int ts_delay = 55; /* us */
 static int ts_delay_pressure;	/* us */
 
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 06cef3ccc63a..3a5ebf452e81 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -60,11 +60,11 @@
 #define DRIVER_AUTHOR		"Daniel Ritz <daniel.ritz@gmx.ch>"
 #define DRIVER_DESC		"USB Touchscreen Driver"
 
-static int swap_xy;
+static bool swap_xy;
 module_param(swap_xy, bool, 0644);
 MODULE_PARM_DESC(swap_xy, "If set X and Y axes are swapped.");
 
-static int hwcalib_xy;
+static bool hwcalib_xy;
 module_param(hwcalib_xy, bool, 0644);
 MODULE_PARM_DESC(hwcalib_xy, "If set hw-calibrated X/Y are used if available");
 
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 9c8d7aa053c5..a0ed668d4d2a 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -40,7 +40,7 @@ MODULE_DESCRIPTION("CAPI4Linux: DMA support for active AVM cards");
 MODULE_AUTHOR("Carsten Paeth");
 MODULE_LICENSE("GPL");
 
-static int suppress_pollack = 0;
+static bool suppress_pollack = 0;
 module_param(suppress_pollack, bool, 0);
 
 /* ------------------------------------------------------------- */
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index d3530f6e8115..9743b24ef9d6 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -40,7 +40,7 @@ static char *revision = "$Revision: 1.1.2.2 $";
 
 /* ------------------------------------------------------------- */
 
-static int suppress_pollack;
+static bool suppress_pollack;
 
 static struct pci_device_id c4_pci_tbl[] = {
 	{ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285, PCI_VENDOR_ID_AVM, PCI_DEVICE_ID_AVM_C4, 0, 0, (unsigned long)4 },
diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c
index ca710ab278ec..023de789f250 100644
--- a/drivers/isdn/sc/init.c
+++ b/drivers/isdn/sc/init.c
@@ -30,7 +30,7 @@ static const char *boardname[] = { "DataCommute/BRI", "DataCommute/PRI", "TeleCo
 static unsigned int io[] = {0,0,0,0};
 static unsigned char irq[] = {0,0,0,0};
 static unsigned long ram[] = {0,0,0,0};
-static int do_reset = 0;
+static bool do_reset = 0;
 
 module_param_array(io, int, NULL, 0);
 module_param_array(irq, int, NULL, 0);
diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c
index a498135a4e80..1ed1677c916f 100644
--- a/drivers/leds/leds-clevo-mail.c
+++ b/drivers/leds/leds-clevo-mail.c
@@ -18,7 +18,7 @@ MODULE_AUTHOR("Márton Németh <nm127@freemail.hu>");
 MODULE_DESCRIPTION("Clevo mail LED driver");
 MODULE_LICENSE("GPL");
 
-static unsigned int __initdata nodetect;
+static bool __initdata nodetect;
 module_param_named(nodetect, nodetect, bool, 0);
 MODULE_PARM_DESC(nodetect, "Skip DMI hardware detection");
 
diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c
index 614ebebaaa28..57371e1485ab 100644
--- a/drivers/leds/leds-ss4200.c
+++ b/drivers/leds/leds-ss4200.c
@@ -79,7 +79,7 @@ static int __init ss4200_led_dmi_callback(const struct dmi_system_id *id)
 	return 1;
 }
 
-static unsigned int __initdata nodetect;
+static bool __initdata nodetect;
 module_param_named(nodetect, nodetect, bool, 0);
 MODULE_PARM_DESC(nodetect, "Skip DMI-based hardware detection");
 
diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c
index 399beb1638d1..5c6a2d876562 100644
--- a/drivers/macintosh/ams/ams-core.c
+++ b/drivers/macintosh/ams/ams-core.c
@@ -31,7 +31,7 @@
 /* There is only one motion sensor per machine */
 struct ams ams_info;
 
-static unsigned int verbose;
+static bool verbose;
 module_param(verbose, bool, 0644);
 MODULE_PARM_DESC(verbose, "Show free falls and shocks in kernel output");
 
diff --git a/drivers/macintosh/ams/ams-input.c b/drivers/macintosh/ams/ams-input.c
index 8a712392cd38..b27e530a87a4 100644
--- a/drivers/macintosh/ams/ams-input.c
+++ b/drivers/macintosh/ams/ams-input.c
@@ -19,11 +19,11 @@
 
 #include "ams.h"
 
-static unsigned int joystick;
+static bool joystick;
 module_param(joystick, bool, S_IRUGO);
 MODULE_PARM_DESC(joystick, "Enable the input class device on module load");
 
-static unsigned int invert;
+static bool invert;
 module_param(invert, bool, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(invert, "Invert input data on X and Y axis");
 
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index 02367308ff2e..c60d025044ee 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -52,7 +52,7 @@ static const char *sensor_location[3];
 
 static int limit_adjust;
 static int fan_speed = -1;
-static int verbose;
+static bool verbose;
 
 MODULE_AUTHOR("Colin Leroy <colin@colino.net>");
 MODULE_DESCRIPTION("Driver for ADT746x thermostat in iBook G4 and "
diff --git a/drivers/media/dvb/dvb-usb/af9005.c b/drivers/media/dvb/dvb-usb/af9005.c
index bd51a764351b..4fc024d77040 100644
--- a/drivers/media/dvb/dvb-usb/af9005.c
+++ b/drivers/media/dvb/dvb-usb/af9005.c
@@ -30,7 +30,7 @@ MODULE_PARM_DESC(debug,
 		 "set debugging level (1=info,xfer=2,rc=4,reg=8,i2c=16,fw=32 (or-able))."
 		 DVB_USB_DEBUG_STATUS);
 /* enable obnoxious led */
-int dvb_usb_af9005_led = 1;
+bool dvb_usb_af9005_led = 1;
 module_param_named(led, dvb_usb_af9005_led, bool, 0644);
 MODULE_PARM_DESC(led, "enable led (default: 1).");
 
diff --git a/drivers/media/dvb/dvb-usb/af9005.h b/drivers/media/dvb/dvb-usb/af9005.h
index c71c77bd7f4b..6a2bf3de8456 100644
--- a/drivers/media/dvb/dvb-usb/af9005.h
+++ b/drivers/media/dvb/dvb-usb/af9005.h
@@ -35,7 +35,7 @@ extern int dvb_usb_af9005_debug;
 #define deb_i2c(args...)  dprintk(dvb_usb_af9005_debug,0x10,args)
 #define deb_fw(args...)   dprintk(dvb_usb_af9005_debug,0x20,args)
 
-extern int dvb_usb_af9005_led;
+extern bool dvb_usb_af9005_led;
 
 /* firmware */
 #define FW_BULKOUT_SIZE 250
diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c
index edadc8449a3d..36ce0611c037 100644
--- a/drivers/media/radio/radio-gemtek.c
+++ b/drivers/media/radio/radio-gemtek.c
@@ -47,11 +47,11 @@ MODULE_VERSION("0.0.4");
 #endif
 
 static int io		= CONFIG_RADIO_GEMTEK_PORT;
-static int probe	= CONFIG_RADIO_GEMTEK_PROBE;
-static int hardmute;
-static int shutdown	= 1;
-static int keepmuted	= 1;
-static int initmute	= 1;
+static bool probe	= CONFIG_RADIO_GEMTEK_PROBE;
+static bool hardmute;
+static bool shutdown	= 1;
+static bool keepmuted	= 1;
+static bool initmute	= 1;
 static int radio_nr	= -1;
 
 module_param(io, int, 0444);
diff --git a/drivers/media/radio/radio-miropcm20.c b/drivers/media/radio/radio-miropcm20.c
index 3fb76e3834c9..87c1ee13b058 100644
--- a/drivers/media/radio/radio-miropcm20.c
+++ b/drivers/media/radio/radio-miropcm20.c
@@ -23,7 +23,7 @@ static int radio_nr = -1;
 module_param(radio_nr, int, 0);
 MODULE_PARM_DESC(radio_nr, "Set radio device number (/dev/radioX).  Default: -1 (autodetect)");
 
-static int mono;
+static bool mono;
 module_param(mono, bool, 0);
 MODULE_PARM_DESC(mono, "Force tuner into mono mode.");
 
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index 27997a9ceb0d..ca12d3289bfe 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -38,7 +38,7 @@
 #include <media/lirc.h>
 #include <media/lirc_dev.h>
 
-static int debug;
+static bool debug;
 
 #define IRCTL_DEV_NAME	"BaseRemoteCtl"
 #define NOPLUG		-1
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 20bb12d6fbbe..21105bf9594d 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -156,9 +156,9 @@
 
 /* module parameters */
 #ifdef CONFIG_USB_DEBUG
-static int debug = 1;
+static bool debug = 1;
 #else
-static int debug;
+static bool debug;
 #endif
 
 #define mce_dbg(dev, fmt, ...)					\
diff --git a/drivers/media/rc/streamzap.c b/drivers/media/rc/streamzap.c
index b1d29d09eeae..d6f4bfe09391 100644
--- a/drivers/media/rc/streamzap.c
+++ b/drivers/media/rc/streamzap.c
@@ -43,9 +43,9 @@
 #define DRIVER_DESC	"Streamzap Remote Control driver"
 
 #ifdef CONFIG_USB_DEBUG
-static int debug = 1;
+static bool debug = 1;
 #else
-static int debug;
+static bool debug;
 #endif
 
 #define USB_STREAMZAP_VENDOR_ID		0x0e9c
diff --git a/drivers/media/rc/winbond-cir.c b/drivers/media/rc/winbond-cir.c
index e7f7a57bf684..b09c5fae489b 100644
--- a/drivers/media/rc/winbond-cir.c
+++ b/drivers/media/rc/winbond-cir.c
@@ -226,11 +226,11 @@ module_param(protocol, uint, 0444);
 MODULE_PARM_DESC(protocol, "IR protocol to use for the power-on command "
 		 "(0 = RC5, 1 = NEC, 2 = RC6A, default)");
 
-static int invert; /* default = 0 */
+static bool invert; /* default = 0 */
 module_param(invert, bool, 0444);
 MODULE_PARM_DESC(invert, "Invert the signal from the IR receiver");
 
-static int txandrx; /* default = 0 */
+static bool txandrx; /* default = 0 */
 module_param(txandrx, bool, 0444);
 MODULE_PARM_DESC(invert, "Allow simultaneous TX and RX");
 
diff --git a/drivers/media/video/c-qcam.c b/drivers/media/video/c-qcam.c
index cd8ff0473184..fda32f52554a 100644
--- a/drivers/media/video/c-qcam.c
+++ b/drivers/media/video/c-qcam.c
@@ -72,7 +72,7 @@ struct qcam {
 
 static int parport[MAX_CAMS] = { [1 ... MAX_CAMS-1] = -1 };
 static int probe = 2;
-static int force_rgb;
+static bool force_rgb;
 static int video_nr = -1;
 
 /* FIXME: parport=auto would never have worked, surely? --RR */
diff --git a/drivers/media/video/cs5345.c b/drivers/media/video/cs5345.c
index 5909f2557ab4..1d64af9adf71 100644
--- a/drivers/media/video/cs5345.c
+++ b/drivers/media/video/cs5345.c
@@ -31,7 +31,7 @@ MODULE_DESCRIPTION("i2c device driver for cs5345 Audio ADC");
 MODULE_AUTHOR("Hans Verkuil");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 
 module_param(debug, bool, 0644);
 
diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c
index d93e5ab45fd3..51c5b9ad67d8 100644
--- a/drivers/media/video/cs53l32a.c
+++ b/drivers/media/video/cs53l32a.c
@@ -35,7 +35,7 @@ MODULE_DESCRIPTION("i2c device driver for cs53l32a Audio ADC");
 MODULE_AUTHOR("Martin Vaughan");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 
 module_param(debug, bool, 0644);
 
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index c6ff32a6137c..349bd9c2aff5 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -75,7 +75,7 @@ static int radio[CX18_MAX_CARDS] = { -1, -1, -1, -1, -1, -1, -1, -1,
 				     -1, -1, -1, -1, -1, -1, -1, -1 };
 static unsigned cardtype_c = 1;
 static unsigned tuner_c = 1;
-static unsigned radio_c = 1;
+static bool radio_c = 1;
 static char pal[] = "--";
 static char secam[] = "--";
 static char ntsc[] = "-";
diff --git a/drivers/media/video/cx25821/cx25821-alsa.c b/drivers/media/video/cx25821/cx25821-alsa.c
index 09e99de5fd21..58be4f3bb3cb 100644
--- a/drivers/media/video/cx25821/cx25821-alsa.c
+++ b/drivers/media/video/cx25821/cx25821-alsa.c
@@ -102,7 +102,7 @@ struct cx25821_audio_dev {
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;	/* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;	/* ID for this card */
-static int enable[SNDRV_CARDS] = { 1, [1 ... (SNDRV_CARDS - 1)] = 1 };
+static bool enable[SNDRV_CARDS] = { 1, [1 ... (SNDRV_CARDS - 1)] = 1 };
 
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable cx25821 soundcard. default enabled.");
diff --git a/drivers/media/video/cx88/cx88-alsa.c b/drivers/media/video/cx88/cx88-alsa.c
index 68d1240f493c..04bf6627d362 100644
--- a/drivers/media/video/cx88/cx88-alsa.c
+++ b/drivers/media/video/cx88/cx88-alsa.c
@@ -96,7 +96,7 @@ typedef struct cx88_audio_dev snd_cx88_card_t;
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;	/* Index 0-MAX */
 static const char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;	/* ID for this card */
-static int enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1};
+static bool enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1};
 
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable cx88x soundcard. default enabled.");
diff --git a/drivers/media/video/gspca/m5602/m5602_core.c b/drivers/media/video/gspca/m5602/m5602_core.c
index 9fe3816b2aa0..0c4493675438 100644
--- a/drivers/media/video/gspca/m5602/m5602_core.c
+++ b/drivers/media/video/gspca/m5602/m5602_core.c
@@ -27,8 +27,8 @@
 
 /* Kernel module parameters */
 int force_sensor;
-static int dump_bridge;
-int dump_sensor;
+static bool dump_bridge;
+bool dump_sensor;
 
 static const struct usb_device_id m5602_table[] = {
 	{USB_DEVICE(0x0402, 0x5602)},
diff --git a/drivers/media/video/gspca/m5602/m5602_mt9m111.h b/drivers/media/video/gspca/m5602/m5602_mt9m111.h
index b1f0c492036a..8c672b5c8c6a 100644
--- a/drivers/media/video/gspca/m5602/m5602_mt9m111.h
+++ b/drivers/media/video/gspca/m5602/m5602_mt9m111.h
@@ -106,7 +106,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int mt9m111_probe(struct sd *sd);
 int mt9m111_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_ov7660.h b/drivers/media/video/gspca/m5602/m5602_ov7660.h
index 2efd607987ec..2b6a13b508f7 100644
--- a/drivers/media/video/gspca/m5602/m5602_ov7660.h
+++ b/drivers/media/video/gspca/m5602/m5602_ov7660.h
@@ -86,7 +86,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int ov7660_probe(struct sd *sd);
 int ov7660_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_ov9650.h b/drivers/media/video/gspca/m5602/m5602_ov9650.h
index da9a129b739d..f7aa5bf68983 100644
--- a/drivers/media/video/gspca/m5602/m5602_ov9650.h
+++ b/drivers/media/video/gspca/m5602/m5602_ov9650.h
@@ -135,7 +135,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int ov9650_probe(struct sd *sd);
 int ov9650_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_po1030.h b/drivers/media/video/gspca/m5602/m5602_po1030.h
index 338359596398..81a2bcb88fe3 100644
--- a/drivers/media/video/gspca/m5602/m5602_po1030.h
+++ b/drivers/media/video/gspca/m5602/m5602_po1030.h
@@ -147,7 +147,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int po1030_probe(struct sd *sd);
 int po1030_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_s5k4aa.h b/drivers/media/video/gspca/m5602/m5602_s5k4aa.h
index 8cc7a3f6da72..8e0035e731c7 100644
--- a/drivers/media/video/gspca/m5602/m5602_s5k4aa.h
+++ b/drivers/media/video/gspca/m5602/m5602_s5k4aa.h
@@ -65,7 +65,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int s5k4aa_probe(struct sd *sd);
 int s5k4aa_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/m5602/m5602_s5k83a.h b/drivers/media/video/gspca/m5602/m5602_s5k83a.h
index 80a63a236e24..79952247b534 100644
--- a/drivers/media/video/gspca/m5602/m5602_s5k83a.h
+++ b/drivers/media/video/gspca/m5602/m5602_s5k83a.h
@@ -41,7 +41,7 @@
 
 /* Kernel module parameters */
 extern int force_sensor;
-extern int dump_sensor;
+extern bool dump_sensor;
 
 int s5k83a_probe(struct sd *sd);
 int s5k83a_init(struct sd *sd);
diff --git a/drivers/media/video/gspca/stv06xx/stv06xx.c b/drivers/media/video/gspca/stv06xx/stv06xx.c
index 0ab425fbea9a..6f878f6c6e99 100644
--- a/drivers/media/video/gspca/stv06xx/stv06xx.c
+++ b/drivers/media/video/gspca/stv06xx/stv06xx.c
@@ -36,8 +36,8 @@ MODULE_AUTHOR("Erik Andrén");
 MODULE_DESCRIPTION("STV06XX USB Camera Driver");
 MODULE_LICENSE("GPL");
 
-static int dump_bridge;
-static int dump_sensor;
+static bool dump_bridge;
+static bool dump_sensor;
 
 int stv06xx_write_bridge(struct sd *sd, u16 address, u16 i2c_data)
 {
diff --git a/drivers/media/video/hdpvr/hdpvr-core.c b/drivers/media/video/hdpvr/hdpvr-core.c
index 3f1a5b1beeba..e5eb56a5b618 100644
--- a/drivers/media/video/hdpvr/hdpvr-core.c
+++ b/drivers/media/video/hdpvr/hdpvr-core.c
@@ -49,7 +49,7 @@ module_param(default_audio_input, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(default_audio_input, "default audio input: 0=RCA back / "
 		 "1=RCA front / 2=S/PDIF");
 
-static int boost_audio;
+static bool boost_audio;
 module_param(boost_audio, bool, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(boost_audio, "boost the audio signal");
 
diff --git a/drivers/media/video/ivtv/ivtv-driver.c b/drivers/media/video/ivtv/ivtv-driver.c
index 41108a9a195e..544af91cbdc1 100644
--- a/drivers/media/video/ivtv/ivtv-driver.c
+++ b/drivers/media/video/ivtv/ivtv-driver.c
@@ -99,7 +99,7 @@ static int i2c_clock_period[IVTV_MAX_CARDS] = { -1, -1, -1, -1, -1, -1, -1, -1,
 
 static unsigned int cardtype_c = 1;
 static unsigned int tuner_c = 1;
-static unsigned int radio_c = 1;
+static bool radio_c = 1;
 static unsigned int i2c_clock_period_c = 1;
 static char pal[] = "---";
 static char secam[] = "--";
diff --git a/drivers/media/video/ivtv/ivtvfb.c b/drivers/media/video/ivtv/ivtvfb.c
index 6b7c9c823330..d0fbfcf7133d 100644
--- a/drivers/media/video/ivtv/ivtvfb.c
+++ b/drivers/media/video/ivtv/ivtvfb.c
@@ -58,7 +58,7 @@
 /* card parameters */
 static int ivtvfb_card_id = -1;
 static int ivtvfb_debug = 0;
-static int osd_laced;
+static bool osd_laced;
 static int osd_depth;
 static int osd_upper;
 static int osd_left;
diff --git a/drivers/media/video/marvell-ccic/mcam-core.c b/drivers/media/video/marvell-ccic/mcam-core.c
index 80ec64d2d6d8..2c8fc0f6d690 100644
--- a/drivers/media/video/marvell-ccic/mcam-core.c
+++ b/drivers/media/video/marvell-ccic/mcam-core.c
@@ -51,7 +51,7 @@ static int delivered;
  * sense.
  */
 
-static int alloc_bufs_at_read;
+static bool alloc_bufs_at_read;
 module_param(alloc_bufs_at_read, bool, 0444);
 MODULE_PARM_DESC(alloc_bufs_at_read,
 		"Non-zero value causes DMA buffers to be allocated when the "
@@ -73,11 +73,11 @@ MODULE_PARM_DESC(dma_buf_size,
 		"parameters require larger buffers, an attempt to reallocate "
 		"will be made.");
 #else /* MCAM_MODE_VMALLOC */
-static const int alloc_bufs_at_read = 0;
+static const bool alloc_bufs_at_read = 0;
 static const int n_dma_bufs = 3;  /* Used by S/G_PARM */
 #endif /* MCAM_MODE_VMALLOC */
 
-static int flip;
+static bool flip;
 module_param(flip, bool, 0444);
 MODULE_PARM_DESC(flip,
 		"If set, the sensor will be instructed to flip the image "
diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c
index d0f538857285..d7cd0f633f63 100644
--- a/drivers/media/video/msp3400-driver.c
+++ b/drivers/media/video/msp3400-driver.c
@@ -69,12 +69,12 @@ MODULE_LICENSE("GPL");
 /* module parameters */
 static int opmode   = OPMODE_AUTO;
 int msp_debug;		 /* msp_debug output */
-int msp_once;		 /* no continuous stereo monitoring */
-int msp_amsound;	 /* hard-wire AM sound at 6.5 Hz (france),
+bool msp_once;		 /* no continuous stereo monitoring */
+bool msp_amsound;	 /* hard-wire AM sound at 6.5 Hz (france),
 			    the autoscan seems work well only with FM... */
 int msp_standard = 1;    /* Override auto detect of audio msp_standard,
 			    if needed. */
-int msp_dolby;
+bool msp_dolby;
 
 int msp_stereo_thresh = 0x190; /* a2 threshold for stereo/bilingual
 					(msp34xxg only) 0x00a0-0x03c0 */
diff --git a/drivers/media/video/msp3400-driver.h b/drivers/media/video/msp3400-driver.h
index 831e8db4368c..fbe5e0715f93 100644
--- a/drivers/media/video/msp3400-driver.h
+++ b/drivers/media/video/msp3400-driver.h
@@ -44,10 +44,10 @@
 
 /* module parameters */
 extern int msp_debug;
-extern int msp_once;
-extern int msp_amsound;
+extern bool msp_once;
+extern bool msp_amsound;
 extern int msp_standard;
-extern int msp_dolby;
+extern bool msp_dolby;
 extern int msp_stereo_thresh;
 
 struct msp_state {
diff --git a/drivers/media/video/omap/omap_vout.c b/drivers/media/video/omap/omap_vout.c
index ee0d0b39cd17..0de598bf66bb 100644
--- a/drivers/media/video/omap/omap_vout.c
+++ b/drivers/media/video/omap/omap_vout.c
@@ -70,9 +70,9 @@ static u32 video1_numbuffers = 3;
 static u32 video2_numbuffers = 3;
 static u32 video1_bufsize = OMAP_VOUT_MAX_BUF_SIZE;
 static u32 video2_bufsize = OMAP_VOUT_MAX_BUF_SIZE;
-static u32 vid1_static_vrfb_alloc;
-static u32 vid2_static_vrfb_alloc;
-static int debug;
+static bool vid1_static_vrfb_alloc;
+static bool vid2_static_vrfb_alloc;
+static bool debug;
 
 /* Module parameters */
 module_param(video1_numbuffers, uint, S_IRUGO);
diff --git a/drivers/media/video/omap/omap_vout_vrfb.c b/drivers/media/video/omap/omap_vout_vrfb.c
index ebebcac49225..4be26abf6cea 100644
--- a/drivers/media/video/omap/omap_vout_vrfb.c
+++ b/drivers/media/video/omap/omap_vout_vrfb.c
@@ -84,7 +84,7 @@ void omap_vout_free_vrfb_buffers(struct omap_vout_device *vout)
 }
 
 int omap_vout_setup_vrfb_bufs(struct platform_device *pdev, int vid_num,
-			u32 static_vrfb_allocation)
+			      bool static_vrfb_allocation)
 {
 	int ret = 0, i, j;
 	struct omap_vout_device *vout;
diff --git a/drivers/media/video/ov7670.c b/drivers/media/video/ov7670.c
index 8aa058531280..6a564964853a 100644
--- a/drivers/media/video/ov7670.c
+++ b/drivers/media/video/ov7670.c
@@ -25,7 +25,7 @@ MODULE_AUTHOR("Jonathan Corbet <corbet@lwn.net>");
 MODULE_DESCRIPTION("A low-level driver for OmniVision ov7670 sensors");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
 
diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c
index 5cfdbc78b918..0ef5484696b6 100644
--- a/drivers/media/video/saa7115.c
+++ b/drivers/media/video/saa7115.c
@@ -57,7 +57,7 @@ MODULE_AUTHOR(  "Maxim Yevtyushkin, Kevin Thayer, Chris Kennedy, "
 		"Hans Verkuil, Mauro Carvalho Chehab");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c
index b7fb5a5cad7e..3c61aec517ac 100644
--- a/drivers/media/video/stk-webcam.c
+++ b/drivers/media/video/stk-webcam.c
@@ -38,11 +38,11 @@
 #include "stk-webcam.h"
 
 
-static int hflip = 1;
+static bool hflip = 1;
 module_param(hflip, bool, 0444);
 MODULE_PARM_DESC(hflip, "Horizontal image flip (mirror). Defaults to 1");
 
-static int vflip = 1;
+static bool vflip = 1;
 module_param(vflip, bool, 0444);
 MODULE_PARM_DESC(vflip, "Vertical image flip. Defaults to 1");
 
diff --git a/drivers/media/video/tm6000/tm6000-alsa.c b/drivers/media/video/tm6000/tm6000-alsa.c
index 7d675c72fd47..bb2047c10358 100644
--- a/drivers/media/video/tm6000/tm6000-alsa.c
+++ b/drivers/media/video/tm6000/tm6000-alsa.c
@@ -42,7 +42,7 @@
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;	/* Index 0-MAX */
 
-static int enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1};
+static bool enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 1};
 
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable tm6000x soundcard. default enabled.");
diff --git a/drivers/media/video/tvp514x.c b/drivers/media/video/tvp514x.c
index 926f03931156..dd26cacd0556 100644
--- a/drivers/media/video/tvp514x.c
+++ b/drivers/media/video/tvp514x.c
@@ -52,7 +52,7 @@
 #define LOCK_RETRY_DELAY                (200)
 
 /* Debug functions */
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
 
diff --git a/drivers/media/video/tvp7002.c b/drivers/media/video/tvp7002.c
index 7875e80cb2ff..236c559d5f51 100644
--- a/drivers/media/video/tvp7002.c
+++ b/drivers/media/video/tvp7002.c
@@ -63,7 +63,7 @@ MODULE_LICENSE("GPL");
 #define TVP7002_CL_MASK		0x0f
 
 /* Debug functions */
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debug level (0-2)");
 
diff --git a/drivers/media/video/upd64083.c b/drivers/media/video/upd64083.c
index 9bbe61700fd5..65d065aa6091 100644
--- a/drivers/media/video/upd64083.c
+++ b/drivers/media/video/upd64083.c
@@ -34,7 +34,7 @@ MODULE_DESCRIPTION("uPD64083 driver");
 MODULE_AUTHOR("T. Adachi, Takeru KOMORIYA, Hans Verkuil");
 MODULE_LICENSE("GPL");
 
-static int debug;
+static bool debug;
 module_param(debug, bool, 0644);
 
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
diff --git a/drivers/media/video/via-camera.c b/drivers/media/video/via-camera.c
index cbf13d09b4ac..bfae41ba53c3 100644
--- a/drivers/media/video/via-camera.c
+++ b/drivers/media/video/via-camera.c
@@ -34,13 +34,13 @@ MODULE_AUTHOR("Jonathan Corbet <corbet@lwn.net>");
 MODULE_DESCRIPTION("VIA framebuffer-based camera controller driver");
 MODULE_LICENSE("GPL");
 
-static int flip_image;
+static bool flip_image;
 module_param(flip_image, bool, 0444);
 MODULE_PARM_DESC(flip_image,
 		"If set, the sensor will be instructed to flip the image "
 		"vertically.");
 
-static int override_serial;
+static bool override_serial;
 module_param(override_serial, bool, 0444);
 MODULE_PARM_DESC(override_serial,
 		"The camera driver will normally refuse to load if "
diff --git a/drivers/media/video/zoran/zoran_device.c b/drivers/media/video/zoran/zoran_device.c
index e8a27844bf39..e86173bd1327 100644
--- a/drivers/media/video/zoran/zoran_device.c
+++ b/drivers/media/video/zoran/zoran_device.c
@@ -57,7 +57,7 @@
 		   ZR36057_ISR_GIRQ1 | \
 		   ZR36057_ISR_JPEGRepIRQ )
 
-static int lml33dpath;		/* default = 0
+static bool lml33dpath;		/* default = 0
 				 * 1 will use digital path in capture
 				 * mode instead of analog. It can be
 				 * used for picture adjustments using
diff --git a/drivers/media/video/zoran/zr36060.c b/drivers/media/video/zoran/zr36060.c
index 5e4f57cbf314..f08546fe2234 100644
--- a/drivers/media/video/zoran/zr36060.c
+++ b/drivers/media/video/zoran/zr36060.c
@@ -50,7 +50,7 @@
 /* amount of chips attached via this driver */
 static int zr36060_codecs;
 
-static int low_bitrate;
+static bool low_bitrate;
 module_param(low_bitrate, bool, 0);
 MODULE_PARM_DESC(low_bitrate, "Buz compatibility option, halves bitrate");
 
diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c
index 6ce70e9615d3..5319e9b65847 100644
--- a/drivers/memstick/host/jmb38x_ms.c
+++ b/drivers/memstick/host/jmb38x_ms.c
@@ -21,7 +21,7 @@
 
 #define DRIVER_NAME "jmb38x_ms"
 
-static int no_dma;
+static bool no_dma;
 module_param(no_dma, bool, 0644);
 
 enum {
diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c
index 668f5c6a0399..29b2172ae18f 100644
--- a/drivers/memstick/host/r592.c
+++ b/drivers/memstick/host/r592.c
@@ -23,7 +23,7 @@
 #include <linux/swab.h>
 #include "r592.h"
 
-static int r592_enable_dma = 1;
+static bool r592_enable_dma = 1;
 static int debug;
 
 static const char *tpc_names[] = {
diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c
index b7aacf47703a..6902b83eb1b4 100644
--- a/drivers/memstick/host/tifm_ms.c
+++ b/drivers/memstick/host/tifm_ms.c
@@ -22,7 +22,7 @@
 
 #define DRIVER_NAME "tifm_ms"
 
-static int no_dma;
+static bool no_dma;
 module_param(no_dma, bool, 0644);
 
 /*
diff --git a/drivers/misc/iwmc3200top/main.c b/drivers/misc/iwmc3200top/main.c
index b1f4563be9ae..701eb600b127 100644
--- a/drivers/misc/iwmc3200top/main.c
+++ b/drivers/misc/iwmc3200top/main.c
@@ -376,20 +376,20 @@ static int blocks;
 module_param(blocks, int, 0604);
 MODULE_PARM_DESC(blocks, "max_blocks_to_send");
 
-static int dump;
+static bool dump;
 module_param(dump, bool, 0604);
 MODULE_PARM_DESC(dump, "dump_hex_content");
 
-static int jump = 1;
+static bool jump = 1;
 module_param(jump, bool, 0604);
 
-static int direct = 1;
+static bool direct = 1;
 module_param(direct, bool, 0604);
 
-static int checksum = 1;
+static bool checksum = 1;
 module_param(checksum, bool, 0604);
 
-static int fw_download = 1;
+static bool fw_download = 1;
 module_param(fw_download, bool, 0604);
 
 static int block_size = IWMC_SDIO_BLK_SIZE;
@@ -398,7 +398,7 @@ module_param(block_size, int, 0404);
 static int download_trans_blks = IWMC_DEFAULT_TR_BLK;
 module_param(download_trans_blks, int, 0604);
 
-static int rubbish_barker;
+static bool rubbish_barker;
 module_param(rubbish_barker, bool, 0604);
 
 #ifdef CONFIG_IWMC3200TOP_DEBUG
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 950b97d7412a..75d7d7e17366 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -48,7 +48,7 @@ static struct workqueue_struct *workqueue;
  * performance cost, and for other reasons may not always be desired.
  * So we allow it it to be disabled.
  */
-int use_spi_crc = 1;
+bool use_spi_crc = 1;
 module_param(use_spi_crc, bool, 0);
 
 /*
@@ -58,9 +58,9 @@ module_param(use_spi_crc, bool, 0);
  * overridden if necessary.
  */
 #ifdef CONFIG_MMC_UNSAFE_RESUME
-int mmc_assume_removable;
+bool mmc_assume_removable;
 #else
-int mmc_assume_removable = 1;
+bool mmc_assume_removable = 1;
 #endif
 EXPORT_SYMBOL(mmc_assume_removable);
 module_param_named(removable, mmc_assume_removable, bool, 0644);
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 14664f1fb16f..afa6bd2b7b70 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -64,7 +64,7 @@ int mmc_attach_sd(struct mmc_host *host);
 int mmc_attach_sdio(struct mmc_host *host);
 
 /* Module parameters */
-extern int use_spi_crc;
+extern bool use_spi_crc;
 
 /* Debugfs information for hosts and cards */
 void mmc_add_host_debugfs(struct mmc_host *host);
diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index f70d04664cac..69d249f51d6a 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c
@@ -22,8 +22,8 @@
 #define DRIVER_NAME "tifm_sd"
 #define DRIVER_VERSION "0.8"
 
-static int no_dma = 0;
-static int fixed_timeout = 0;
+static bool no_dma = 0;
+static bool fixed_timeout = 0;
 module_param(no_dma, bool, 0644);
 module_param(fixed_timeout, bool, 0644);
 
diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c
index 2ec978bc32ba..3135a1a5d75d 100644
--- a/drivers/mmc/host/vub300.c
+++ b/drivers/mmc/host/vub300.c
@@ -223,25 +223,25 @@ enum SD_RESPONSE_TYPE {
 #define FUN(c) (0x000007 & (c->arg>>28))
 #define REG(c) (0x01FFFF & (c->arg>>9))
 
-static int limit_speed_to_24_MHz;
+static bool limit_speed_to_24_MHz;
 module_param(limit_speed_to_24_MHz, bool, 0644);
 MODULE_PARM_DESC(limit_speed_to_24_MHz, "Limit Max SDIO Clock Speed to 24 MHz");
 
-static int pad_input_to_usb_pkt;
+static bool pad_input_to_usb_pkt;
 module_param(pad_input_to_usb_pkt, bool, 0644);
 MODULE_PARM_DESC(pad_input_to_usb_pkt,
 		 "Pad USB data input transfers to whole USB Packet");
 
-static int disable_offload_processing;
+static bool disable_offload_processing;
 module_param(disable_offload_processing, bool, 0644);
 MODULE_PARM_DESC(disable_offload_processing, "Disable Offload Processing");
 
-static int force_1_bit_data_xfers;
+static bool force_1_bit_data_xfers;
 module_param(force_1_bit_data_xfers, bool, 0644);
 MODULE_PARM_DESC(force_1_bit_data_xfers,
 		 "Force SDIO Data Transfers to 1-bit Mode");
 
-static int force_polling_for_irqs;
+static bool force_polling_for_irqs;
 module_param(force_polling_for_irqs, bool, 0644);
 MODULE_PARM_DESC(force_polling_for_irqs, "Force Polling for SDIO interrupts");
 
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 8544d6bf50a0..5c3d719c37e6 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -185,7 +185,7 @@ struct pxa3xx_nand_info {
 	uint32_t		ndcb2;
 };
 
-static int use_dma = 1;
+static bool use_dma = 1;
 module_param(use_dma, bool, 0444);
 MODULE_PARM_DESC(use_dma, "enable DMA for data transferring to/from NAND HW");
 
diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c
index f20f393bfda6..769a4e096b3c 100644
--- a/drivers/mtd/nand/r852.c
+++ b/drivers/mtd/nand/r852.c
@@ -22,7 +22,7 @@
 #include "r852.h"
 
 
-static int r852_enable_dma = 1;
+static bool r852_enable_dma = 1;
 module_param(r852_enable_dma, bool, S_IRUGO);
 MODULE_PARM_DESC(r852_enable_dma, "Enable usage of the DMA (default)");
 
diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c
index 0dc34f12f92e..d4716273651e 100644
--- a/drivers/parport/parport_ip32.c
+++ b/drivers/parport/parport_ip32.c
@@ -135,7 +135,7 @@
 #define PARPORT_IP32_ENABLE_EPP	(1U << 3)
 #define PARPORT_IP32_ENABLE_ECP	(1U << 4)
 static unsigned int features =	~0U;
-static int verbose_probing =	DEFAULT_VERBOSE_PROBING;
+static bool verbose_probing =	DEFAULT_VERBOSE_PROBING;
 
 /* We do not support more than one port. */
 static struct parport *this_port = NULL;
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 095f29e13734..2a47e82821da 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -44,7 +44,7 @@
 #define	METHOD_NAME__SUN	"_SUN"
 #define	METHOD_NAME_OSHP	"OSHP"
 
-static int debug_acpi;
+static bool debug_acpi;
 
 static acpi_status
 decode_type0_hpx_record(union acpi_object *record, struct hotplug_params *hpx)
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index efa9f2de51c1..aa41631e9e02 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -47,7 +47,7 @@
 /* name size which is used for entries in pcihpfs */
 #define SLOT_NAME_SIZE  21              /* {_SUN} */
 
-static int debug;
+static bool debug;
 int acpiphp_debug;
 
 /* local variables */
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index e525263210ee..c35e8ad6db01 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -43,7 +43,7 @@
 #define DRIVER_AUTHOR	"Irene Zubarev <zubarev@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>"
 #define DRIVER_DESC	"ACPI Hot Plug PCI Controller Driver IBM extension"
 
-static int debug;
+static bool debug;
 
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/pci/hotplug/cpcihp_zt5550.c b/drivers/pci/hotplug/cpcihp_zt5550.c
index 41f6a8d79c81..6bf8d2ab164f 100644
--- a/drivers/pci/hotplug/cpcihp_zt5550.c
+++ b/drivers/pci/hotplug/cpcihp_zt5550.c
@@ -57,8 +57,8 @@
 #define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME , ## arg)
 
 /* local variables */
-static int debug;
-static int poll;
+static bool debug;
+static bool poll;
 static struct cpci_hp_controller_ops zt5550_hpc_ops;
 static struct cpci_hp_controller zt5550_hpc;
 
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index f1ce99cceac6..187a199da93c 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -57,8 +57,8 @@ struct irq_routing_table *cpqhp_routing_table;
 static void __iomem *smbios_table;
 static void __iomem *smbios_start;
 static void __iomem *cpqhp_rom_start;
-static int power_mode;
-static int debug;
+static bool power_mode;
+static bool debug;
 static int initialized;
 
 #define DRIVER_VERSION	"0.9.8"
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index d934dd4fa873..5506e0e8fbc0 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -49,7 +49,7 @@
 
 int ibmphp_debug;
 
-static int debug;
+static bool debug;
 module_param(debug, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC (debug, "Debugging mode enabled or not");
 MODULE_LICENSE ("GPL");
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 6d2eea93298f..202f4a969eb5 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -51,7 +51,7 @@
 
 
 /* local variables */
-static int debug;
+static bool debug;
 
 #define DRIVER_VERSION	"0.5"
 #define DRIVER_AUTHOR	"Greg Kroah-Hartman <greg@kroah.com>, Scott Murray <scottm@somanetworks.com>"
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 9a33fdde2d16..4b7cce1de6ec 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -40,10 +40,10 @@
 
 #define MY_NAME	"pciehp"
 
-extern int pciehp_poll_mode;
+extern bool pciehp_poll_mode;
 extern int pciehp_poll_time;
-extern int pciehp_debug;
-extern int pciehp_force;
+extern bool pciehp_debug;
+extern bool pciehp_force;
 extern struct workqueue_struct *pciehp_wq;
 
 #define dbg(format, arg...)						\
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index b8c99d35ac97..365c6b96c642 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -38,10 +38,10 @@
 #include <linux/time.h>
 
 /* Global variables */
-int pciehp_debug;
-int pciehp_poll_mode;
+bool pciehp_debug;
+bool pciehp_poll_mode;
 int pciehp_poll_time;
-int pciehp_force;
+bool pciehp_force;
 struct workqueue_struct *pciehp_wq;
 
 #define DRIVER_VERSION	"0.4"
diff --git a/drivers/pci/hotplug/pcihp_skeleton.c b/drivers/pci/hotplug/pcihp_skeleton.c
index 5175d9b26f0b..b20ceaaa31f4 100644
--- a/drivers/pci/hotplug/pcihp_skeleton.c
+++ b/drivers/pci/hotplug/pcihp_skeleton.c
@@ -59,7 +59,7 @@ static LIST_HEAD(slot_list);
 #define warn(format, arg...) printk(KERN_WARNING "%s: " format "\n", MY_NAME , ## arg)
 
 /* local variables */
-static int debug;
+static bool debug;
 static int num_slots;
 
 #define DRIVER_VERSION	"0.3"
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 419919a87b0f..df5677440a08 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -46,7 +46,7 @@
 #define PRESENT         1	/* Card in slot */
 
 #define MY_NAME "rpaphp"
-extern int rpaphp_debug;
+extern bool rpaphp_debug;
 #define dbg(format, arg...)					\
 	do {							\
 		if (rpaphp_debug)					\
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 758adb5f47fd..127d6e600185 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -37,7 +37,7 @@
 				/* and pci_do_scan_bus */
 #include "rpaphp.h"
 
-int rpaphp_debug;
+bool rpaphp_debug;
 LIST_HEAD(rpaphp_slot_head);
 
 #define DRIVER_VERSION	"0.1"
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index e0c90e643b5f..ca64932e658b 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -43,9 +43,9 @@
 	#define MY_NAME	THIS_MODULE->name
 #endif
 
-extern int shpchp_poll_mode;
+extern bool shpchp_poll_mode;
 extern int shpchp_poll_time;
-extern int shpchp_debug;
+extern bool shpchp_debug;
 extern struct workqueue_struct *shpchp_wq;
 extern struct workqueue_struct *shpchp_ordered_wq;
 
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index dd7e0c51a33e..7414fd9ad1d2 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -36,8 +36,8 @@
 #include "shpchp.h"
 
 /* Global variables */
-int shpchp_debug;
-int shpchp_poll_mode;
+bool shpchp_debug;
+bool shpchp_poll_mode;
 int shpchp_poll_time;
 struct workqueue_struct *shpchp_wq;
 struct workqueue_struct *shpchp_ordered_wq;
diff --git a/drivers/pci/pcie/aer/aer_inject.c b/drivers/pci/pcie/aer/aer_inject.c
index 95489cd9a555..52229863e9fe 100644
--- a/drivers/pci/pcie/aer/aer_inject.c
+++ b/drivers/pci/pcie/aer/aer_inject.c
@@ -28,7 +28,7 @@
 #include "aerdrv.h"
 
 /* Override the existing corrected and uncorrected error masks */
-static int aer_mask_override;
+static bool aer_mask_override;
 module_param(aer_mask_override, bool, 0);
 
 struct aer_error_inj {
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 9674e9f30d49..0ca053538146 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -27,8 +27,8 @@
 #include <linux/kfifo.h>
 #include "aerdrv.h"
 
-static int forceload;
-static int nosourceid;
+static bool forceload;
+static bool nosourceid;
 module_param(forceload, bool, 0);
 module_param(nosourceid, bool, 0);
 
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index 9dc565c615bd..849c0c11d2af 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -24,15 +24,15 @@
 #include "yenta_socket.h"
 #include "i82365.h"
 
-static int disable_clkrun;
+static bool disable_clkrun;
 module_param(disable_clkrun, bool, 0444);
 MODULE_PARM_DESC(disable_clkrun, "If PC card doesn't function properly, please try this option");
 
-static int isa_probe = 1;
+static bool isa_probe = 1;
 module_param(isa_probe, bool, 0444);
 MODULE_PARM_DESC(isa_probe, "If set ISA interrupts are probed (default). Set to N to disable probing");
 
-static int pwr_irqs_off;
+static bool pwr_irqs_off;
 module_param(pwr_irqs_off, bool, 0644);
 MODULE_PARM_DESC(pwr_irqs_off, "Force IRQs off during power-on of slot. Use only when seeing IRQ storms!");
 
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 8877b836d27c..d96734478324 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -189,7 +189,7 @@ struct compal_data{
 /* =============== */
 /* General globals */
 /* =============== */
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Force driver load, ignore DMI data");
 
diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c
index 7f88c7923fc6..6ee0b5c90933 100644
--- a/drivers/platform/x86/intel_oaktrail.c
+++ b/drivers/platform/x86/intel_oaktrail.c
@@ -95,7 +95,7 @@
 #define OT_EC_BL_CONTROL_ON_DATA	0x1A
 
 
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Force driver load, ignore DMI data");
 
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index f204643c5052..bb5132128b33 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -89,7 +89,7 @@ static int msi_laptop_resume(struct platform_device *device);
 
 #define MSI_STANDARD_EC_DEVICES_EXISTS_ADDRESS	0x2f
 
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force, "Force driver load, ignore DMI data");
 
diff --git a/drivers/platform/x86/samsung-laptop.c b/drivers/platform/x86/samsung-laptop.c
index 09e26bfd4643..fd73ea89b857 100644
--- a/drivers/platform/x86/samsung-laptop.c
+++ b/drivers/platform/x86/samsung-laptop.c
@@ -228,12 +228,12 @@ static struct platform_device *sdev;
 static struct rfkill *rfk;
 static bool has_stepping_quirk;
 
-static int force;
+static bool force;
 module_param(force, bool, 0);
 MODULE_PARM_DESC(force,
 		"Disable the DMI check and forces the driver to be loaded");
 
-static int debug;
+static bool debug;
 module_param(debug, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(debug, "Debug enabled or not");
 
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 62533c105da4..ea0c6075b720 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -378,13 +378,13 @@ static unsigned int bright_maxlvl;	/* 0 = unknown */
 
 #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES
 static int dbg_wlswemul;
-static int tpacpi_wlsw_emulstate;
+static bool tpacpi_wlsw_emulstate;
 static int dbg_bluetoothemul;
-static int tpacpi_bluetooth_emulstate;
+static bool tpacpi_bluetooth_emulstate;
 static int dbg_wwanemul;
-static int tpacpi_wwan_emulstate;
+static bool tpacpi_wwan_emulstate;
 static int dbg_uwbemul;
-static int tpacpi_uwb_emulstate;
+static bool tpacpi_uwb_emulstate;
 #endif
 
 
@@ -6444,7 +6444,7 @@ static struct ibm_struct brightness_driver_data = {
 
 static int alsa_index = ~((1 << (SNDRV_CARDS - 3)) - 1); /* last three slots */
 static char *alsa_id = "ThinkPadEC";
-static int alsa_enable = SNDRV_DEFAULT_ENABLE1;
+static bool alsa_enable = SNDRV_DEFAULT_ENABLE1;
 
 struct tpacpi_alsa_data {
 	struct snd_card *card;
@@ -6487,7 +6487,7 @@ static enum tpacpi_volume_access_mode volume_mode =
 	TPACPI_VOL_MODE_MAX;
 
 static enum tpacpi_volume_capabilities volume_capabilities;
-static int volume_control_allowed;
+static bool volume_control_allowed;
 
 /*
  * Used to syncronize writers to TP_EC_AUDIO and
@@ -7265,7 +7265,7 @@ enum fan_control_commands {
 						 * and also watchdog cmd */
 };
 
-static int fan_control_allowed;
+static bool fan_control_allowed;
 
 static enum fan_status_access_mode fan_status_access_mode;
 static enum fan_control_access_mode fan_control_access_mode;
@@ -8437,7 +8437,7 @@ static struct proc_dir_entry *proc_dir;
  * Module and infrastructure proble, init and exit handling
  */
 
-static int force_load;
+static bool force_load;
 
 #ifdef CONFIG_THINKPAD_ACPI_DEBUG
 static const char * __init str_supported(int is_supported)
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index a134c26870b0..42a4dcc25f92 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -82,12 +82,12 @@ struct wmi_block {
 #define ACPI_WMI_STRING      0x4	/* GUID takes & returns a string */
 #define ACPI_WMI_EVENT       0x8	/* GUID is an event */
 
-static int debug_event;
+static bool debug_event;
 module_param(debug_event, bool, 0444);
 MODULE_PARM_DESC(debug_event,
 		 "Log WMI Events [0/1]");
 
-static int debug_dump_wdg;
+static bool debug_dump_wdg;
 module_param(debug_dump_wdg, bool, 0444);
 MODULE_PARM_DESC(debug_dump_wdg,
 		 "Dump available WMI interfaces [0/1]");
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c
index 545874b1df9e..076e211a40b7 100644
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -64,7 +64,7 @@ static unsigned int cache_time = 1000;
 module_param(cache_time, uint, 0644);
 MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
 
-static unsigned int pmod_enabled;
+static bool pmod_enabled;
 module_param(pmod_enabled, bool, 0644);
 MODULE_PARM_DESC(pmod_enabled, "PMOD enable bit");
 
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index e5cb9248a442..f3b8bb84faf2 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -75,7 +75,7 @@ static LIST_HEAD(raw3270_devices);
 static int raw3270_registered;
 
 /* Module parameters */
-static int tubxcorrect = 0;
+static bool tubxcorrect = 0;
 module_param(tubxcorrect, bool, 0);
 
 /*
diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c
index 11312f401c70..2211277a1079 100644
--- a/drivers/s390/char/vmwatchdog.c
+++ b/drivers/s390/char/vmwatchdog.c
@@ -28,9 +28,9 @@
 #define MAX_CMDLEN 240
 #define MIN_INTERVAL 15
 static char vmwdt_cmd[MAX_CMDLEN] = "IPL";
-static int vmwdt_conceal;
+static bool vmwdt_conceal;
 
-static int vmwdt_nowayout = WATCHDOG_NOWAYOUT;
+static bool vmwdt_nowayout = WATCHDOG_NOWAYOUT;
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 195823a51aab..ed119cedaae0 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -102,7 +102,7 @@ static int setup_dmaspeed[MAXBOARDS] __initdata = { -1, -1, -1, -1 };
  */
 
 #if defined(MODULE)
-static int isapnp = 0;
+static bool isapnp = 0;
 static int aha1542[] = {0x330, 11, 4, -1};
 module_param_array(aha1542, int, NULL, 0);
 module_param(isapnp, bool, 0);
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
index f5b718d3c31b..13aeca3d51f2 100644
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -546,7 +546,7 @@ static struct ParameterData __devinitdata cfg_data[] = {
  * command line overrides will be used. If set to 1 then safe and
  * slow settings will be used.
  */
-static int use_safe_settings = 0;
+static bool use_safe_settings = 0;
 module_param_named(safe, use_safe_settings, bool, 0);
 MODULE_PARM_DESC(safe, "Use safe and slow settings only. Default: false");
 
diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c
index f6a50c98c36f..002924963cd8 100644
--- a/drivers/scsi/nsp32.c
+++ b/drivers/scsi/nsp32.c
@@ -59,11 +59,11 @@ MODULE_PARM_DESC(trans_mode, "transfer mode (0: BIOS(default) 1: Async 2: Ultra2
 #define ASYNC_MODE    1
 #define ULTRA20M_MODE 2
 
-static int       auto_param = 0;	/* default: ON */
+static bool      auto_param = 0;	/* default: ON */
 module_param     (auto_param, bool, 0);
 MODULE_PARM_DESC(auto_param, "AutoParameter mode (0: ON(default) 1: OFF)");
 
-static int       disc_priv  = 1;	/* default: OFF */
+static bool      disc_priv  = 1;	/* default: OFF */
 module_param     (disc_priv, bool, 0);
 MODULE_PARM_DESC(disc_priv,  "disconnection privilege mode (0: ON 1: OFF(default))");
 
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index ca86721a71b9..b61a753eb896 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -70,7 +70,7 @@ module_param(nsp_burst_mode, int, 0);
 MODULE_PARM_DESC(nsp_burst_mode, "Burst transfer mode (0=io8, 1=io32, 2=mem32(default))");
 
 /* Release IO ports after configuration? */
-static int       free_ports = 0;
+static bool       free_ports = 0;
 module_param(free_ports, bool, 0);
 MODULE_PARM_DESC(free_ports, "Release IO ports after configuration? (default: 0 (=no))");
 
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 0d18d80bcd25..9bcf87ae4c00 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -61,7 +61,7 @@ EXPORT_SYMBOL(comedi_debug);
 module_param(comedi_debug, int, 0644);
 #endif
 
-int comedi_autoconfig = 1;
+bool comedi_autoconfig = 1;
 module_param(comedi_autoconfig, bool, 0444);
 
 static int comedi_num_legacy_minors;
diff --git a/drivers/staging/comedi/comedi_fops.h b/drivers/staging/comedi/comedi_fops.h
index da4b4f5553f5..006cf14c577a 100644
--- a/drivers/staging/comedi/comedi_fops.h
+++ b/drivers/staging/comedi/comedi_fops.h
@@ -1,10 +1,11 @@
 
 #ifndef _COMEDI_FOPS_H
 #define _COMEDI_FOPS_H
+#include <linux/types.h>
 
 extern struct class *comedi_class;
 extern const struct file_operations comedi_fops;
-extern int comedi_autoconfig;
+extern bool comedi_autoconfig;
 extern struct comedi_driver *comedi_drivers;
 
 #endif /* _COMEDI_FOPS_H */
diff --git a/drivers/staging/media/go7007/snd-go7007.c b/drivers/staging/media/go7007/snd-go7007.c
index deac938d8505..d071c838ac2a 100644
--- a/drivers/staging/media/go7007/snd-go7007.c
+++ b/drivers/staging/media/go7007/snd-go7007.c
@@ -38,7 +38,7 @@
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
-static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
 
 module_param_array(index, int, NULL, 0444);
 module_param_array(id, charp, NULL, 0444);
diff --git a/drivers/staging/media/lirc/lirc_bt829.c b/drivers/staging/media/lirc/lirc_bt829.c
index c5a0d27a02dc..4d20e9f74118 100644
--- a/drivers/staging/media/lirc/lirc_bt829.c
+++ b/drivers/staging/media/lirc/lirc_bt829.c
@@ -53,7 +53,7 @@ static unsigned char do_get_bits(void);
 
 #define DRIVER_NAME "lirc_bt829"
 
-static int debug;
+static bool debug;
 #define dprintk(fmt, args...)						 \
 	do {								 \
 		if (debug)						 \
diff --git a/drivers/staging/media/lirc/lirc_igorplugusb.c b/drivers/staging/media/lirc/lirc_igorplugusb.c
index 6cd4cd67a1dd..7a2501776679 100644
--- a/drivers/staging/media/lirc/lirc_igorplugusb.c
+++ b/drivers/staging/media/lirc/lirc_igorplugusb.c
@@ -62,9 +62,9 @@
 
 /* debugging support */
 #ifdef CONFIG_USB_DEBUG
-static int debug = 1;
+static bool debug = 1;
 #else
-static int debug;
+static bool debug;
 #endif
 
 #define dprintk(fmt, args...)					\
diff --git a/drivers/staging/media/lirc/lirc_parallel.c b/drivers/staging/media/lirc/lirc_parallel.c
index 02b07a6c1771..dd2bca7b56fa 100644
--- a/drivers/staging/media/lirc/lirc_parallel.c
+++ b/drivers/staging/media/lirc/lirc_parallel.c
@@ -63,8 +63,8 @@
 
 /*** Global Variables ***/
 
-static int debug;
-static int check_pselecd;
+static bool debug;
+static bool check_pselecd;
 
 unsigned int irq = LIRC_IRQ;
 unsigned int io = LIRC_PORT;
diff --git a/drivers/staging/media/lirc/lirc_serial.c b/drivers/staging/media/lirc/lirc_serial.c
index 8a060a8a7224..2aac67c98283 100644
--- a/drivers/staging/media/lirc/lirc_serial.c
+++ b/drivers/staging/media/lirc/lirc_serial.c
@@ -107,13 +107,13 @@ struct lirc_serial {
 static int type;
 static int io;
 static int irq;
-static int iommap;
+static bool iommap;
 static int ioshift;
-static int softcarrier = 1;
-static int share_irq;
-static int debug;
+static bool softcarrier = 1;
+static bool share_irq;
+static bool debug;
 static int sense = -1;	/* -1 = auto, 0 = active high, 1 = active low */
-static int txsense;	/* 0 = active high, 1 = active low */
+static bool txsense;	/* 0 = active high, 1 = active low */
 
 #define dprintk(fmt, args...)					\
 	do {							\
diff --git a/drivers/staging/media/lirc/lirc_sir.c b/drivers/staging/media/lirc/lirc_sir.c
index 6903d3992eca..c94382b917ac 100644
--- a/drivers/staging/media/lirc/lirc_sir.c
+++ b/drivers/staging/media/lirc/lirc_sir.c
@@ -173,7 +173,7 @@ static DEFINE_SPINLOCK(hardware_lock);
 static int rx_buf[RBUF_LEN];
 static unsigned int rx_tail, rx_head;
 
-static int debug;
+static bool debug;
 #define dprintk(fmt, args...)						\
 	do {								\
 		if (debug)						\
diff --git a/drivers/staging/media/lirc/lirc_zilog.c b/drivers/staging/media/lirc/lirc_zilog.c
index 0302d82a12f7..76ea4a8f2c75 100644
--- a/drivers/staging/media/lirc/lirc_zilog.c
+++ b/drivers/staging/media/lirc/lirc_zilog.c
@@ -155,8 +155,8 @@ static struct mutex tx_data_lock;
 #define zilog_info(s, args...) printk(KERN_INFO KBUILD_MODNAME ": " s, ## args)
 
 /* module parameters */
-static int debug;	/* debug output */
-static int tx_only;	/* only handle the IR Tx function */
+static bool debug;	/* debug output */
+static bool tx_only;	/* only handle the IR Tx function */
 static int minor = -1;	/* minor number */
 
 #define dprintk(fmt, args...)						\
diff --git a/drivers/staging/quatech_usb2/quatech_usb2.c b/drivers/staging/quatech_usb2/quatech_usb2.c
index 02fafecd4773..897a3a99c794 100644
--- a/drivers/staging/quatech_usb2/quatech_usb2.c
+++ b/drivers/staging/quatech_usb2/quatech_usb2.c
@@ -16,7 +16,7 @@
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
 
-static int debug;
+static bool debug;
 
 /* Version Information */
 #define DRIVER_VERSION "v2.00"
diff --git a/drivers/staging/serqt_usb2/serqt_usb2.c b/drivers/staging/serqt_usb2/serqt_usb2.c
index c44e41af2880..1c5780f1571b 100644
--- a/drivers/staging/serqt_usb2/serqt_usb2.c
+++ b/drivers/staging/serqt_usb2/serqt_usb2.c
@@ -16,7 +16,7 @@
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
 
-static int debug;
+static bool debug;
 
 /* Version Information */
 #define DRIVER_VERSION "v2.14"
diff --git a/drivers/staging/speakup/speakup.h b/drivers/staging/speakup/speakup.h
index 412b87947f66..e66579e6147a 100644
--- a/drivers/staging/speakup/speakup.h
+++ b/drivers/staging/speakup/speakup.h
@@ -116,7 +116,7 @@ extern int bleep_time, bell_pos;
 extern int spell_delay, key_echo;
 extern short punc_mask;
 extern short pitch_shift, synth_flags;
-extern int quiet_boot;
+extern bool quiet_boot;
 extern char *synth_name;
 extern struct bleep unprocessed_sound;
 
diff --git a/drivers/staging/speakup/synth.c b/drivers/staging/speakup/synth.c
index c241074a4b5e..2222d6919ef5 100644
--- a/drivers/staging/speakup/synth.c
+++ b/drivers/staging/speakup/synth.c
@@ -22,7 +22,7 @@ static struct spk_synth *synths[MAXSYNTHS];
 struct spk_synth *synth;
 char pitch_buff[32] = "";
 static int module_status;
-int quiet_boot;
+bool quiet_boot;
 
 struct speakup_info_t speakup_info = {
 	.spinlock = __SPIN_LOCK_UNLOCKED(speakup_info.spinlock),
diff --git a/drivers/staging/vme/bridges/vme_tsi148.c b/drivers/staging/vme/bridges/vme_tsi148.c
index 08a449b4abf9..f50582169b24 100644
--- a/drivers/staging/vme/bridges/vme_tsi148.c
+++ b/drivers/staging/vme/bridges/vme_tsi148.c
@@ -41,7 +41,7 @@ static void __exit tsi148_exit(void);
 
 
 /* Module parameter */
-static int err_chk;
+static bool err_chk;
 static int geoid;
 
 static const char driver_name[] = "vme_tsi148";
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index 6a1241c7f841..de88aa5566e5 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -118,7 +118,7 @@ static unsigned long board2;
 static unsigned long board3;
 static unsigned long board4;
 static unsigned long controller;
-static int support_low_speed;
+static bool support_low_speed;
 static unsigned long modem1;
 static unsigned long modem2;
 static unsigned long modem3;
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index e67fb20490d2..ff8017f87914 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -850,7 +850,7 @@ static int mgsl_device_count;
  * .text section address and breakpoint on module load.
  * This is useful for use with gdb and add-symbol-file command.
  */
-static int break_on_load;
+static bool break_on_load;
 
 /*
  * Driver major number, defaults to zero to get auto
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index 0f6b796c95c5..a7efe538df00 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -456,7 +456,7 @@ static int synclinkmp_device_count = 0;
  * .text section address and breakpoint on module load.
  * This is useful for use with gdb and add-symbol-file command.
  */
-static int break_on_load = 0;
+static bool break_on_load = 0;
 
 /*
  * Driver major number, defaults to zero to get auto
diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c
index b42092e1f164..98dd9e49b684 100644
--- a/drivers/usb/atm/speedtch.c
+++ b/drivers/usb/atm/speedtch.c
@@ -73,9 +73,9 @@ static const char speedtch_driver_name[] = "speedtch";
 #define DEFAULT_SW_BUFFERING	0
 
 static unsigned int altsetting = 0; /* zero means: use the default */
-static int dl_512_first = DEFAULT_DL_512_FIRST;
-static int enable_isoc = DEFAULT_ENABLE_ISOC;
-static int sw_buffering = DEFAULT_SW_BUFFERING;
+static bool dl_512_first = DEFAULT_DL_512_FIRST;
+static bool enable_isoc = DEFAULT_ENABLE_ISOC;
+static bool sw_buffering = DEFAULT_SW_BUFFERING;
 
 #define DEFAULT_B_MAX_DSL	8128
 #define DEFAULT_MODEM_MODE	11
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index 00f171a7a8a0..01ea5d7421d4 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -542,7 +542,7 @@ static int modem_index;
 static unsigned int debug;
 static unsigned int altsetting[NB_MODEM] = {
 				[0 ... (NB_MODEM - 1)] = FASTEST_ISO_INTF};
-static int sync_wait[NB_MODEM];
+static bool sync_wait[NB_MODEM];
 static char *cmv_file[NB_MODEM];
 static int annex[NB_MODEM];
 
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 3af5e2dd1d82..8df4b76465ac 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -93,7 +93,7 @@ struct async {
 	u8 bulk_status;
 };
 
-static int usbfs_snoop;
+static bool usbfs_snoop;
 module_param(usbfs_snoop, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(usbfs_snoop, "true to log all usbfs traffic");
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 79d339e2e700..a0613d8f9be7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -102,7 +102,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khubd_wait);
 static struct task_struct *khubd_task;
 
 /* cycle leds on hubs that aren't blinking for attention */
-static int blinkenlights = 0;
+static bool blinkenlights = 0;
 module_param (blinkenlights, bool, S_IRUGO);
 MODULE_PARM_DESC (blinkenlights, "true to cycle leds on hubs");
 
@@ -131,12 +131,12 @@ MODULE_PARM_DESC(initial_descriptor_timeout,
  * otherwise the new scheme is used.  If that fails and "use_both_schemes"
  * is set, then the driver will make another attempt, using the other scheme.
  */
-static int old_scheme_first = 0;
+static bool old_scheme_first = 0;
 module_param(old_scheme_first, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(old_scheme_first,
 		 "start with the old device initialization scheme");
 
-static int use_both_schemes = 1;
+static bool use_both_schemes = 1;
 module_param(use_both_schemes, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(use_both_schemes,
 		"try the other device initialization scheme if the "
@@ -2026,7 +2026,7 @@ static unsigned hub_is_wusb(struct usb_hub *hub)
 #define SET_ADDRESS_TRIES	2
 #define GET_DESCRIPTOR_TRIES	2
 #define SET_CONFIG_TRIES	(2 * (use_both_schemes + 1))
-#define USE_NEW_SCHEME(i)	((i) / 2 == old_scheme_first)
+#define USE_NEW_SCHEME(i)	((i) / 2 == (int)old_scheme_first)
 
 #define HUB_ROOT_RESET_TIME	50	/* times are in msec */
 #define HUB_SHORT_RESET_TIME	10
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 1382c90d0834..8ca9f994a280 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -47,7 +47,7 @@
 
 const char *usbcore_name = "usbcore";
 
-static int nousb;	/* Disable USB when built into kernel image */
+static bool nousb;	/* Disable USB when built into kernel image */
 
 #ifdef	CONFIG_USB_SUSPEND
 static int usb_autosuspend_delay = 2;		/* Default delay value,
diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c
index e9a2c5c44454..c16ff55a74e8 100644
--- a/drivers/usb/gadget/amd5536udc.c
+++ b/drivers/usb/gadget/amd5536udc.c
@@ -152,15 +152,15 @@ static const char *ep_string[] = {
 };
 
 /* DMA usage flag */
-static int use_dma = 1;
+static bool use_dma = 1;
 /* packet per buffer dma */
-static int use_dma_ppb = 1;
+static bool use_dma_ppb = 1;
 /* with per descr. update */
-static int use_dma_ppb_du;
+static bool use_dma_ppb_du;
 /* buffer fill mode */
 static int use_dma_bufferfill_mode;
 /* full speed only mode */
-static int use_fullspeed;
+static bool use_fullspeed;
 /* tx buffer size for high speed */
 static unsigned long hs_tx_buf = UDC_EPIN_BUFF_SIZE;
 
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 0cd764d59351..a28f6ffcd0f3 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -250,9 +250,9 @@ static struct usb_configuration rndis_config_driver = {
 /*-------------------------------------------------------------------------*/
 
 #ifdef CONFIG_USB_ETH_EEM
-static int use_eem = 1;
+static bool use_eem = 1;
 #else
-static int use_eem;
+static bool use_eem;
 #endif
 module_param(use_eem, bool, 0);
 MODULE_PARM_DESC(use_eem, "use CDC EEM mode");
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index e0f30fc70e45..47766f0e7caa 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -303,16 +303,16 @@ MODULE_LICENSE("Dual BSD/GPL");
 static struct {
 	char		*file[FSG_MAX_LUNS];
 	char		*serial;
-	int		ro[FSG_MAX_LUNS];
-	int		nofua[FSG_MAX_LUNS];
+	bool		ro[FSG_MAX_LUNS];
+	bool		nofua[FSG_MAX_LUNS];
 	unsigned int	num_filenames;
 	unsigned int	num_ros;
 	unsigned int	num_nofuas;
 	unsigned int	nluns;
 
-	int		removable;
-	int		can_stall;
-	int		cdrom;
+	bool		removable;
+	bool		can_stall;
+	bool		cdrom;
 
 	char		*transport_parm;
 	char		*protocol_parm;
diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c
index 4c81d540bc26..7322d293213e 100644
--- a/drivers/usb/gadget/net2272.c
+++ b/drivers/usb/gadget/net2272.c
@@ -69,7 +69,7 @@ static const char * const ep_name[] = {
  *
  * If use_dma is disabled, pio will be used instead.
  */
-static int use_dma = 0;
+static bool use_dma = 0;
 module_param(use_dma, bool, 0644);
 
 /*
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index cf1f36454d08..cdedd1336745 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -90,8 +90,8 @@ static const char *const ep_name [] = {
  * Some gadget drivers work better with the dma support here than others.
  * These two parameters let you use PIO or more aggressive DMA.
  */
-static int use_dma = 1;
-static int use_dma_chaining = 0;
+static bool use_dma = 1;
+static bool use_dma_chaining = 0;
 
 /* "modprobe net2280 use_dma=n" etc */
 module_param (use_dma, bool, S_IRUGO);
@@ -112,7 +112,7 @@ module_param (fifo_mode, ushort, 0644);
  * USB suspend requests will be ignored.  This is acceptable for
  * self-powered devices
  */
-static int enable_suspend = 0;
+static bool enable_suspend = 0;
 
 /* "modprobe net2280 enable_suspend=1" etc */
 module_param (enable_suspend, bool, S_IRUGO);
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 7db5bbe6251b..576cd8578b45 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -98,7 +98,7 @@ module_param (fifo_mode, uint, 0);
 MODULE_PARM_DESC (fifo_mode, "endpoint configuration");
 
 #ifdef	USE_DMA
-static unsigned use_dma = 1;
+static bool use_dma = 1;
 
 /* "modprobe omap_udc use_dma=y", or else as a kernel
  * boot parameter "omap_udc:use_dma=y"
diff --git a/drivers/usb/gadget/pch_udc.c b/drivers/usb/gadget/pch_udc.c
index dd2313cce1d3..a3fcaae4bc2a 100644
--- a/drivers/usb/gadget/pch_udc.c
+++ b/drivers/usb/gadget/pch_udc.c
@@ -359,7 +359,7 @@ struct pch_udc_dev {
 static const char	ep0_string[] = "ep0in";
 static DEFINE_SPINLOCK(udc_stall_spinlock);	/* stall spin lock */
 struct pch_udc_dev *pch_udc;		/* pointer to device object */
-static int speed_fs;
+static bool speed_fs;
 module_param_named(speed_fs, speed_fs, bool, S_IRUGO);
 MODULE_PARM_DESC(speed_fs, "true for Full speed operation");
 
diff --git a/drivers/usb/gadget/serial.c b/drivers/usb/gadget/serial.c
index ed1b816e58d8..ad9e5b2df642 100644
--- a/drivers/usb/gadget/serial.c
+++ b/drivers/usb/gadget/serial.c
@@ -123,11 +123,11 @@ MODULE_AUTHOR("Al Borchers");
 MODULE_AUTHOR("David Brownell");
 MODULE_LICENSE("GPL");
 
-static int use_acm = true;
+static bool use_acm = true;
 module_param(use_acm, bool, 0);
 MODULE_PARM_DESC(use_acm, "Use CDC ACM, default=yes");
 
-static int use_obex = false;
+static bool use_obex = false;
 module_param(use_obex, bool, 0);
 MODULE_PARM_DESC(use_obex, "Use CDC OBEX, default=no");
 
diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c
index 20697cc132d1..31d34832907e 100644
--- a/drivers/usb/gadget/zero.c
+++ b/drivers/usb/gadget/zero.c
@@ -81,7 +81,7 @@ module_param(buflen, uint, 0);
  * work better with hosts where config changes are problematic or
  * controllers (like original superh) that only support one config.
  */
-static int loopdefault = 0;
+static bool loopdefault = 0;
 module_param(loopdefault, bool, S_IRUGO|S_IWUSR);
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index e311a511529b..a007a9fe0f87 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -112,7 +112,7 @@ module_param (park, uint, S_IRUGO);
 MODULE_PARM_DESC (park, "park setting; 1-3 back-to-back async packets");
 
 /* for flakey hardware, ignore overcurrent indicators */
-static int ignore_oc = 0;
+static bool ignore_oc = 0;
 module_param (ignore_oc, bool, S_IRUGO);
 MODULE_PARM_DESC (ignore_oc, "ignore bogus hardware overcurrent indications");
 
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 5f5a63241436..34b9edd86651 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -115,13 +115,13 @@ static inline void sb800_prefetch(struct ohci_hcd *ohci, int on)
 
 
 /* Some boards misreport power switching/overcurrent */
-static int distrust_firmware = 1;
+static bool distrust_firmware = 1;
 module_param (distrust_firmware, bool, 0);
 MODULE_PARM_DESC (distrust_firmware,
 	"true to distrust firmware power/overcurrent setup");
 
 /* Some boards leave IR set wrongly, since they fail BIOS/SMM handshakes */
-static int no_handshake = 0;
+static bool no_handshake = 0;
 module_param (no_handshake, bool, 0);
 MODULE_PARM_DESC (no_handshake, "true (not default) disables BIOS handshake");
 
diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
index 6f62de5c6e35..015c7c62ed49 100644
--- a/drivers/usb/host/oxu210hp-hcd.c
+++ b/drivers/usb/host/oxu210hp-hcd.c
@@ -233,7 +233,7 @@ module_param(park, uint, S_IRUGO);
 MODULE_PARM_DESC(park, "park setting; 1-3 back-to-back async packets");
 
 /* For flakey hardware, ignore overcurrent indicators */
-static int ignore_oc;
+static bool ignore_oc;
 module_param(ignore_oc, bool, S_IRUGO);
 MODULE_PARM_DESC(ignore_oc, "ignore bogus hardware overcurrent indications");
 
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 533d12cca371..16dd6a6abf00 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -74,7 +74,7 @@ MODULE_LICENSE("GPL");
 #define INT_MODULE_PARM(n, v) static int n = v;module_param(n, int, 0444)
 INT_MODULE_PARM(testing, 0);
 /* Some boards misreport power switching/overcurrent*/
-static int distrust_firmware = 1;
+static bool distrust_firmware = 1;
 module_param(distrust_firmware, bool, 0);
 MODULE_PARM_DESC(distrust_firmware, "true to distrust firmware power/overcurren"
 	"t setup");
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index c8ae199cfbb8..6b5eb1017e2c 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -59,7 +59,7 @@
 #define DRIVER_DESC "USB Universal Host Controller Interface driver"
 
 /* for flakey hardware, ignore overcurrent indicators */
-static int ignore_oc;
+static bool ignore_oc;
 module_param(ignore_oc, bool, S_IRUGO);
 MODULE_PARM_DESC(ignore_oc, "ignore hardware overcurrent indications");
 
diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c
index 2dbe600fbc11..a4a3c7cd4a11 100644
--- a/drivers/usb/misc/ftdi-elan.c
+++ b/drivers/usb/misc/ftdi-elan.c
@@ -53,7 +53,7 @@ MODULE_AUTHOR("Tony Olech");
 MODULE_DESCRIPTION("FTDI ELAN driver");
 MODULE_LICENSE("GPL");
 #define INT_MODULE_PARM(n, v) static int n = v;module_param(n, int, 0444)
-static int distrust_firmware = 1;
+static bool distrust_firmware = 1;
 module_param(distrust_firmware, bool, 0);
 MODULE_PARM_DESC(distrust_firmware, "true to distrust firmware power/overcurren"
         "t setup");
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 2453a39b4794..4fd0dc835ae5 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -62,7 +62,7 @@ MODULE_LICENSE("GPL");
 
 /* Module parameters */
 static DEFINE_MUTEX(iowarrior_mutex);
-static int debug = 0;
+static bool debug = 0;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "debug=1 enables debugging messages");
 
diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c
index 53be7aef6308..66bc376005d2 100644
--- a/drivers/usb/musb/cppi_dma.c
+++ b/drivers/usb/musb/cppi_dma.c
@@ -750,7 +750,7 @@ cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx)
  * So this module parameter lets the heuristic be disabled.  When using
  * gadgetfs, the heuristic will probably need to be disabled.
  */
-static int cppi_rx_rndis = 1;
+static bool cppi_rx_rndis = 1;
 
 module_param(cppi_rx_rndis, bool, 0);
 MODULE_PARM_DESC(cppi_rx_rndis, "enable/disable RX RNDIS heuristic");
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index f6ff7923048b..56cf0243979e 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1586,7 +1586,7 @@ irqreturn_t musb_interrupt(struct musb *musb)
 EXPORT_SYMBOL_GPL(musb_interrupt);
 
 #ifndef CONFIG_MUSB_PIO_ONLY
-static int __initdata use_dma = 1;
+static bool __initdata use_dma = 1;
 
 /* "modprobe ... use_dma=0" etc */
 module_param(use_dma, bool, 0);
diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c
index b43d07df4c44..123bf9155339 100644
--- a/drivers/usb/serial/aircable.c
+++ b/drivers/usb/serial/aircable.c
@@ -52,7 +52,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 /* Vendor and Product ID */
 #define AIRCABLE_VID		0x16CA
diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index 18e875b92e00..69328dcfd91a 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -37,7 +37,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 
-static int debug;
+static bool debug;
 /*
  * Version information
  */
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index f9f29b289f2f..29ffeb6279c7 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -37,7 +37,7 @@
 #include <linux/usb/serial.h>
 #include "belkin_sa.h"
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 0e77511060c0..5e53cc59e652 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -70,7 +70,7 @@
 #define CH341_NBREAK_BITS_REG2 0x40
 
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x4348, 0x5523) },
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index adfe660ed008..fba1147ed916 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -49,7 +49,7 @@ static void cp210x_break_ctl(struct tty_struct *, int);
 static int cp210x_startup(struct usb_serial *);
 static void cp210x_dtr_rts(struct usb_serial_port *p, int on);
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index 98bf83349838..6bc3802a581a 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -43,7 +43,7 @@
 
 #define CYBERJACK_LOCAL_BUF_SIZE 32
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index 07680d6b792b..3bdeafa29c24 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -46,10 +46,10 @@
 #include "cypress_m8.h"
 
 
-static int debug;
-static int stats;
+static bool debug;
+static bool stats;
 static int interval;
-static int unstable_bauds;
+static bool unstable_bauds;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 6d26a77d0f2a..b23bebd721a1 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -251,7 +251,7 @@ static int digi_read_oob_callback(struct urb *urb);
 
 /* Statics */
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table_combined[] = {
 	{ USB_DEVICE(DIGI_VENDOR_ID, DIGI_2_ID) },
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index 504b5585ea45..aced6817bf95 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -28,7 +28,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index c290df97108e..01b6404df395 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -55,7 +55,7 @@
 #define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Bill Ryder <bryder@sgi.com>, Kuba Ober <kuba@mareimbrium.org>, Andreas Mohr, Johan Hovold <jhovold@gmail.com>"
 #define DRIVER_DESC "USB FTDI Serial Converters Driver"
 
-static int debug;
+static bool debug;
 static __u16 vendor = FTDI_VID;
 static __u16 product;
 
diff --git a/drivers/usb/serial/funsoft.c b/drivers/usb/serial/funsoft.c
index e21ce9ddfc63..5d4b099dcf8b 100644
--- a/drivers/usb/serial/funsoft.c
+++ b/drivers/usb/serial/funsoft.c
@@ -16,7 +16,7 @@
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x1404, 0xcddc) },
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index bf12565f8e87..21343378c322 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -42,7 +42,7 @@
 static int initial_mode = 1;
 
 /* debug flag */
-static int debug;
+static bool debug;
 
 #define GARMIN_VENDOR_ID             0x091E
 
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index abd2ee2b2f99..0497575e4799 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -191,7 +191,7 @@ static const struct divisor_table_entry divisor_table[] = {
 };
 
 /* local variables */
-static int debug;
+static bool debug;
 
 static atomic_t CmdUrbs;	/* Number of outstanding Command Write Urbs */
 
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index e44d375edaad..65bf06aa591a 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -210,10 +210,10 @@ static unsigned char OperationalMajorVersion;
 static unsigned char OperationalMinorVersion;
 static unsigned short OperationalBuildNumber;
 
-static int debug;
+static bool debug;
 
 static int closing_wait = EDGE_CLOSING_WAIT;
-static int ignore_cpu_rev;
+static bool ignore_cpu_rev;
 static int default_uart_mode;		/* RS232 */
 
 static void edge_tty_recv(struct device *dev, struct tty_struct *tty,
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index 36f5cbe90485..06053a920dd8 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -34,7 +34,7 @@
 #define DRIVER_DESC "USB PocketPC PDA driver"
 
 static __u16 product, vendor;
-static int debug;
+static bool debug;
 static int connect_retries = KP_RETRIES;
 static int initial_wait;
 
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index 5170799d6e94..6f9356f3f99e 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -147,7 +147,7 @@ static struct usb_driver usb_ipw_driver = {
 	.no_dynamic_id = 	1,
 };
 
-static int debug;
+static bool debug;
 
 static int ipw_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 0c537da0d3cd..84a396e83671 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -45,7 +45,7 @@
 #define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Johan Hovold <jhovold@gmail.com>"
 #define DRIVER_DESC "USB IR Dongle driver"
 
-static int debug;
+static bool debug;
 
 /* if overridden by the user, then use their value for the size of the read and
  * write urbs */
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 64d0ffd4440b..3077a4436976 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -34,9 +34,9 @@
 
 
 #ifdef CONFIG_USB_SERIAL_DEBUG
-static int debug = 1;
+static bool debug = 1;
 #else
-static int debug;
+static bool debug;
 #endif
 
 /*
@@ -65,7 +65,7 @@ static int clockmode = 1;
 static int cdmode = 1;
 static int iuu_cardin;
 static int iuu_cardout;
-static int xmas;
+static bool xmas;
 static int vcc_default = 5;
 
 static void read_rxcmd_callback(struct urb *urb);
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index bc8dc203e818..4cc36c761801 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -45,7 +45,7 @@
 #include <linux/usb/serial.h>
 #include "keyspan.h"
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index a40615674a68..7c62a7048302 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -31,7 +31,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 /* make a simple define to handle if we are compiling keyspan_pda or xircom support */
 #if defined(CONFIG_USB_SERIAL_KEYSPAN_PDA) || defined(CONFIG_USB_SERIAL_KEYSPAN_PDA_MODULE)
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index 19373cb7c5bf..fc064e1442ca 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -49,7 +49,7 @@
 #include <linux/usb/serial.h>
 #include "kl5kusb105.h"
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index a975bb80303f..27fa9c8a77b0 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -45,7 +45,7 @@
 #define DRIVER_AUTHOR "Wolfgang Grandegger <wolfgang@ces.ch>"
 #define DRIVER_DESC "Magic Control Technology USB-RS232 converter driver"
 
-static int debug;
+static bool debug;
 
 /*
  * Function prototypes
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 19d112f51b97..4554ee49e635 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -71,7 +71,7 @@ struct moschip_port {
 	struct urb		*write_urb_pool[NUM_URBS];
 };
 
-static int debug;
+static bool debug;
 
 static struct usb_serial_driver moschip7720_2port_driver;
 
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 55cfd6265b98..03b5e249e95e 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -263,7 +263,7 @@ struct moschip_port {
 };
 
 
-static int debug;
+static bool debug;
 
 /*
  * mos7840_set_reg_sync
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index 1f00f243c26c..b28f1db0195f 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -21,7 +21,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x0a99, 0x0001) },	/* Talon Technology device */
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 45a8c55881d3..8b8d58a2ac12 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -23,7 +23,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
-static int debug;
+static bool debug;
 
 /*
  * Version Information
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index 691f57a9d712..262ded9e076b 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -32,7 +32,7 @@
  * an examples of 1D barcode types are EAN, UPC, Code39, IATA etc.. */
 #define DRIVER_DESC	"Opticon USB barcode to serial driver (1D)"
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x065a, 0x0009) },
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index c96b6b6509fb..420d9857394a 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1234,7 +1234,7 @@ static struct usb_serial_driver option_1port_device = {
 #endif
 };
 
-static int debug;
+static bool debug;
 
 /* per port private data */
 
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index 2161d1c3c089..e287fd32682c 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -74,7 +74,7 @@ static struct usb_driver oti6858_driver = {
 	.no_dynamic_id = 	1,
 };
 
-static int debug;
+static bool debug;
 
 /* requests */
 #define	OTI6858_REQ_GET_STATUS		(USB_DIR_IN | USB_TYPE_VENDOR | 0x00)
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 329295615d06..3d8cda57ce7a 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -36,7 +36,7 @@
  */
 #define DRIVER_DESC "Prolific PL2303 USB to serial adaptor driver"
 
-static int debug;
+static bool debug;
 
 #define PL2303_CLOSING_WAIT	(30*HZ)
 
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index aa9367f5b421..1d5deee3be52 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -22,7 +22,7 @@
 #define DRIVER_AUTHOR "Qualcomm Inc"
 #define DRIVER_DESC "Qualcomm USB Serial driver"
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{USB_DEVICE(0x05c6, 0x9211)},	/* Acer Gobi QDL device */
diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index a36e2313eed0..d074b3740dcb 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -81,9 +81,9 @@
 #define CONFIG_USB_SERIAL_SAFE_PADDED 0
 #endif
 
-static int debug;
-static int safe = 1;
-static int padded = CONFIG_USB_SERIAL_SAFE_PADDED;
+static bool debug;
+static bool safe = 1;
+static bool padded = CONFIG_USB_SERIAL_SAFE_PADDED;
 
 #define DRIVER_VERSION "v0.1"
 #define DRIVER_AUTHOR "sl@lineo.com, tbr@lineo.com, Johan Hovold <jhovold@gmail.com>"
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index f2485429172f..fdae0a4407cb 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -46,8 +46,8 @@
    allocations > PAGE_SIZE and the number of packets in a page
    is an integer 512 is the largest possible packet on EHCI */
 
-static int debug;
-static int nmea;
+static bool debug;
+static bool nmea;
 
 /* Used in interface blacklisting */
 struct sierra_iface_info {
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 180ea6c7911c..d7f5eee18f00 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -33,7 +33,7 @@
 #define DRIVER_VERSION	"v0.10"
 #define DRIVER_DESC 	"SPCP8x5 USB to serial adaptor driver"
 
-static int debug;
+static bool debug;
 
 #define SPCP8x5_007_VID		0x04FC
 #define SPCP8x5_007_PID		0x0201
diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c
index 87362e48796e..7697858d8858 100644
--- a/drivers/usb/serial/ssu100.c
+++ b/drivers/usb/serial/ssu100.c
@@ -46,7 +46,7 @@
 #define FULLPWRBIT          0x00000080
 #define NEXT_BOARD_POWER_BIT        0x00000004
 
-static int debug;
+static bool debug;
 
 /* Version Information */
 #define DRIVER_VERSION "v0.1"
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index c70cc012d03f..50651cf4fc61 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -20,7 +20,7 @@
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
 
-static int debug;
+static bool debug;
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x05e0, 0x0600) },
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 4af21f46096e..8468eb769a29 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -150,7 +150,7 @@ static int ti_download_firmware(struct ti_device *tdev);
 /* Data */
 
 /* module parameters */
-static int debug;
+static bool debug;
 static int closing_wait = TI_DEFAULT_CLOSING_WAIT;
 static ushort vendor_3410[TI_EXTRA_VID_PID_COUNT];
 static unsigned int vendor_3410_count;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index ce6c1a65a544..611b206591cb 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -61,7 +61,7 @@ static struct usb_driver usb_serial_driver = {
    drivers depend on it.
 */
 
-static int debug;
+static bool debug;
 /* initially all NULL */
 static struct usb_serial *serial_table[SERIAL_TTY_MINORS];
 static DEFINE_MUTEX(table_lock);
diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
index d555ca9567b8..c88657dd31c8 100644
--- a/drivers/usb/serial/usb_wwan.c
+++ b/drivers/usb/serial/usb_wwan.c
@@ -37,7 +37,7 @@
 #include <linux/serial.h>
 #include "usb-wwan.h"
 
-static int debug;
+static bool debug;
 
 void usb_wwan_dtr_rts(struct usb_serial_port *port, int on)
 {
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 1c11959a7d58..210e4b10dc11 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -52,7 +52,7 @@ static int palm_os_4_probe(struct usb_serial *serial,
 					const struct usb_device_id *id);
 
 /* Parameters that may be passed into the module. */
-static int debug;
+static bool debug;
 static __u16 vendor;
 static __u16 product;
 
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 11af903cb09f..7e0acf5c8e38 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -36,7 +36,7 @@
 #include <linux/ihex.h>
 #include "whiteheat.h"			/* WhiteHEAT specific commands */
 
-static int debug;
+static bool debug;
 
 #ifndef CMSPAR
 #define CMSPAR 0
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 44bdce4242ad..622f12b62a47 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -301,9 +301,9 @@ static struct fb_ops atyfb_ops = {
 	.fb_sync	= atyfb_sync,
 };
 
-static int noaccel;
+static bool noaccel;
 #ifdef CONFIG_MTRR
-static int nomtrr;
+static bool nomtrr;
 #endif
 static int vram;
 static int pll;
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 150684882ef7..ce1506b75adf 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -263,19 +263,19 @@ static reg_val common_regs[] = {
         
 static char *mode_option;
 static char *monitor_layout;
-static int noaccel = 0;
+static bool noaccel = 0;
 static int default_dynclk = -2;
-static int nomodeset = 0;
-static int ignore_edid = 0;
-static int mirror = 0;
+static bool nomodeset = 0;
+static bool ignore_edid = 0;
+static bool mirror = 0;
 static int panel_yres = 0;
-static int force_dfp = 0;
-static int force_measure_pll = 0;
+static bool force_dfp = 0;
+static bool force_measure_pll = 0;
 #ifdef CONFIG_MTRR
-static int nomtrr = 0;
+static bool nomtrr = 0;
 #endif
-static int force_sleep;
-static int ignore_devlist;
+static bool force_sleep;
+static bool ignore_devlist;
 #ifdef CONFIG_PMAC_BACKLIGHT
 static int backlight = 1;
 #else
diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c
index 6df7c54db0a3..6fb499e7678f 100644
--- a/drivers/video/cirrusfb.c
+++ b/drivers/video/cirrusfb.c
@@ -350,7 +350,7 @@ struct cirrusfb_info {
 	void (*unmap)(struct fb_info *info);
 };
 
-static int noaccel __devinitdata;
+static bool noaccel __devinitdata;
 static char *mode_option __devinitdata = "640x480@60";
 
 /****************************************************************************/
diff --git a/drivers/video/hgafb.c b/drivers/video/hgafb.c
index 4394389caf68..c645f9282650 100644
--- a/drivers/video/hgafb.c
+++ b/drivers/video/hgafb.c
@@ -133,7 +133,7 @@ static struct fb_fix_screeninfo hga_fix __devinitdata = {
 /* Don't assume that tty1 will be the initial current console. */
 static int release_io_port = 0;
 static int release_io_ports = 0;
-static int nologo = 0;
+static bool nologo = 0;
 
 /* -------------------------------------------------------------------------
  *
diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c
index 5ba399991050..c94c91fd8668 100644
--- a/drivers/video/intelfb/intelfbdrv.c
+++ b/drivers/video/intelfb/intelfbdrv.c
@@ -230,15 +230,15 @@ MODULE_DESCRIPTION("Framebuffer driver for Intel(R) " SUPPORTED_CHIPSETS
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DEVICE_TABLE(pci, intelfb_pci_table);
 
-static int accel        = 1;
+static bool accel       = 1;
 static int vram         = 4;
-static int hwcursor     = 0;
-static int mtrr         = 1;
-static int fixed        = 0;
-static int noinit       = 0;
-static int noregister   = 0;
-static int probeonly    = 0;
-static int idonly       = 0;
+static bool hwcursor    = 0;
+static bool mtrr        = 1;
+static bool fixed       = 0;
+static bool noinit      = 0;
+static bool noregister  = 0;
+static bool probeonly   = 0;
+static bool idonly      = 0;
 static int bailearly    = 0;
 static int voffset	= 48;
 static char *mode       = NULL;
diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c
index ea7a8ccc830c..080c35b34bbb 100644
--- a/drivers/video/logo/logo.c
+++ b/drivers/video/logo/logo.c
@@ -21,7 +21,7 @@
 #include <asm/bootinfo.h>
 #endif
 
-static int nologo;
+static bool nologo;
 module_param(nologo, bool, 0);
 MODULE_PARM_DESC(nologo, "Disables startup logo");
 
diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index feea7b1dc386..fb3f67391105 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -84,11 +84,11 @@
 
 /* --------------------------------------------------------------------- */
 
-static int internal;
-static int external;
-static int libretto;
-static int nostretch;
-static int nopciburst;
+static bool internal;
+static bool external;
+static bool libretto;
+static bool nostretch;
+static bool nopciburst;
 static char *mode_option __devinitdata = NULL;
 
 #ifdef MODULE
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index 25d8e5103193..b291bfaac80e 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -47,9 +47,9 @@ static unsigned int	def_rotate;
 static unsigned int	def_mirror;
 
 #ifdef CONFIG_FB_OMAP_MANUAL_UPDATE
-static int		manual_update = 1;
+static bool		manual_update = 1;
 #else
-static int		manual_update;
+static bool		manual_update;
 #endif
 
 static struct platform_device	*fbdev_pdev;
diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c
index 86ec12e16c7c..da7b18576549 100644
--- a/drivers/video/omap2/dss/core.c
+++ b/drivers/video/omap2/dss/core.c
@@ -50,7 +50,7 @@ module_param_named(def_disp, def_disp_name, charp, 0);
 MODULE_PARM_DESC(def_disp, "default display name");
 
 #ifdef DEBUG
-unsigned int dss_debug;
+bool dss_debug;
 module_param_named(debug, dss_debug, bool, 0644);
 #endif
 
diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c
index 5abf8e7e7456..46f37883e499 100644
--- a/drivers/video/omap2/dss/dsi.c
+++ b/drivers/video/omap2/dss/dsi.c
@@ -340,8 +340,8 @@ struct dsi_packet_sent_handler_data {
 static struct platform_device *dsi_pdev_map[MAX_NUM_DSI];
 
 #ifdef DEBUG
-static unsigned int dsi_perf;
-module_param_named(dsi_perf, dsi_perf, bool, 0644);
+static bool dsi_perf;
+module_param(dsi_perf, bool, 0644);
 #endif
 
 static inline struct dsi_data *dsi_get_dsidrv_data(struct platform_device *dsidev)
diff --git a/drivers/video/omap2/dss/dss.h b/drivers/video/omap2/dss/dss.h
index 6308fc59fc9e..57a52eecee91 100644
--- a/drivers/video/omap2/dss/dss.h
+++ b/drivers/video/omap2/dss/dss.h
@@ -28,7 +28,7 @@
 #endif
 
 #ifdef DEBUG
-extern unsigned int dss_debug;
+extern bool dss_debug;
 #ifdef DSS_SUBSYS_NAME
 #define DSSDBG(format, ...) \
 	if (dss_debug) \
diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
index 70aa47de7146..68ba1f800082 100644
--- a/drivers/video/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/omap2/omapfb/omapfb-main.c
@@ -43,18 +43,18 @@
 
 static char *def_mode;
 static char *def_vram;
-static int def_vrfb;
+static bool def_vrfb;
 static int def_rotate;
-static int def_mirror;
+static bool def_mirror;
 static bool auto_update;
 static unsigned int auto_update_freq;
 module_param(auto_update, bool, 0);
 module_param(auto_update_freq, uint, 0644);
 
 #ifdef DEBUG
-unsigned int omapfb_debug;
+bool omapfb_debug;
 module_param_named(debug, omapfb_debug, bool, 0644);
-static unsigned int omapfb_test_pattern;
+static bool omapfb_test_pattern;
 module_param_named(test, omapfb_test_pattern, bool, 0644);
 #endif
 
diff --git a/drivers/video/omap2/omapfb/omapfb.h b/drivers/video/omap2/omapfb/omapfb.h
index fdf0edeccf4e..e12d384ea520 100644
--- a/drivers/video/omap2/omapfb/omapfb.h
+++ b/drivers/video/omap2/omapfb/omapfb.h
@@ -32,7 +32,7 @@
 #include <video/omapdss.h>
 
 #ifdef DEBUG
-extern unsigned int omapfb_debug;
+extern bool omapfb_debug;
 #define DBG(format, ...) \
 	do { \
 		if (omapfb_debug) \
diff --git a/drivers/video/pm2fb.c b/drivers/video/pm2fb.c
index dc7bfa91e57a..df31a24a5026 100644
--- a/drivers/video/pm2fb.c
+++ b/drivers/video/pm2fb.c
@@ -78,12 +78,12 @@ static char *mode_option __devinitdata;
  * these flags allow the user to specify that requests for +ve sync
  * should be silently turned in -ve sync.
  */
-static int lowhsync;
-static int lowvsync;
-static int noaccel __devinitdata;
+static bool lowhsync;
+static bool lowvsync;
+static bool noaccel __devinitdata;
 /* mtrr option */
 #ifdef CONFIG_MTRR
-static int nomtrr __devinitdata;
+static bool nomtrr __devinitdata;
 #endif
 
 /*
diff --git a/drivers/video/pm3fb.c b/drivers/video/pm3fb.c
index 6632ee5ecb7e..055e527a8e45 100644
--- a/drivers/video/pm3fb.c
+++ b/drivers/video/pm3fb.c
@@ -57,11 +57,11 @@
  */
 static int hwcursor = 1;
 static char *mode_option __devinitdata;
-static int noaccel __devinitdata;
+static bool noaccel __devinitdata;
 
 /* mtrr option */
 #ifdef CONFIG_MTRR
-static int nomtrr __devinitdata;
+static bool nomtrr __devinitdata;
 #endif
 
 /*
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index d8ab7be4fd6b..2f58cf9c813b 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -207,9 +207,9 @@ MODULE_DEVICE_TABLE(pci, rivafb_pci_tbl);
 /* command line data, set in rivafb_setup() */
 static int flatpanel __devinitdata = -1; /* Autodetect later */
 static int forceCRTC __devinitdata = -1;
-static int noaccel   __devinitdata = 0;
+static bool noaccel  __devinitdata = 0;
 #ifdef CONFIG_MTRR
-static int nomtrr __devinitdata = 0;
+static bool nomtrr __devinitdata = 0;
 #endif
 #ifdef CONFIG_PMAC_BACKLIGHT
 static int backlight __devinitdata = 1;
@@ -218,7 +218,7 @@ static int backlight __devinitdata = 0;
 #endif
 
 static char *mode_option __devinitdata = NULL;
-static int  strictmode       = 0;
+static bool strictmode       = 0;
 
 static struct fb_fix_screeninfo __devinitdata rivafb_fix = {
 	.type		= FB_TYPE_PACKED_PIXELS,
diff --git a/drivers/video/smscufx.c b/drivers/video/smscufx.c
index 3c22994ea31a..ccbfef5e828f 100644
--- a/drivers/video/smscufx.c
+++ b/drivers/video/smscufx.c
@@ -130,8 +130,8 @@ static struct usb_device_id id_table[] = {
 MODULE_DEVICE_TABLE(usb, id_table);
 
 /* module options */
-static int console;   /* Optionally allow fbcon to consume first framebuffer */
-static int fb_defio = true;  /* Optionally enable fb_defio mmap support */
+static bool console;   /* Optionally allow fbcon to consume first framebuffer */
+static bool fb_defio = true;  /* Optionally enable fb_defio mmap support */
 
 /* ufx keeps a list of urbs for efficient bulk transfers */
 static void ufx_urb_completion(struct urb *urb);
diff --git a/drivers/video/sstfb.c b/drivers/video/sstfb.c
index 2301c275d63a..111fb32e8769 100644
--- a/drivers/video/sstfb.c
+++ b/drivers/video/sstfb.c
@@ -93,11 +93,11 @@
 
 /* initialized by setup */
 
-static int vgapass;		/* enable VGA passthrough cable */
+static bool vgapass;		/* enable VGA passthrough cable */
 static int mem;			/* mem size in MB, 0 = autodetect */
-static int clipping = 1;	/* use clipping (slower, safer) */
+static bool clipping = 1;	/* use clipping (slower, safer) */
 static int gfxclk;		/* force FBI freq in Mhz . Dangerous */
-static int slowpci;		/* slow PCI settings */
+static bool slowpci;		/* slow PCI settings */
 
 /*
   Possible default video modes: 800x600@60, 640x480@75, 1024x768@76, 640x480@60
diff --git a/drivers/video/tdfxfb.c b/drivers/video/tdfxfb.c
index a99b994c9b6b..e026724a3a56 100644
--- a/drivers/video/tdfxfb.c
+++ b/drivers/video/tdfxfb.c
@@ -169,7 +169,7 @@ static int nowrap = 1;      /* not implemented (yet) */
 static int hwcursor = 1;
 static char *mode_option __devinitdata;
 /* mtrr option */
-static int nomtrr __devinitdata;
+static bool nomtrr __devinitdata;
 
 /* -------------------------------------------------------------------------
  *			Hardware-specific funcions
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
index 1f868d0187a2..a19773149bd7 100644
--- a/drivers/video/udlfb.c
+++ b/drivers/video/udlfb.c
@@ -69,9 +69,9 @@ static struct usb_device_id id_table[] = {
 MODULE_DEVICE_TABLE(usb, id_table);
 
 /* module options */
-static int console = 1; /* Allow fbcon to open framebuffer */
-static int fb_defio = 1;  /* Detect mmap writes using page faults */
-static int shadow = 1; /* Optionally disable shadow framebuffer */
+static bool console = 1; /* Allow fbcon to open framebuffer */
+static bool fb_defio = 1;  /* Detect mmap writes using page faults */
+static bool shadow = 1; /* Optionally disable shadow framebuffer */
 
 /* dlfb keeps a list of urbs for efficient bulk transfers */
 static void dlfb_urb_completion(struct urb *urb);
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 7f8472cc993b..e7f69ef572dc 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -44,11 +44,11 @@ static struct fb_fix_screeninfo uvesafb_fix __devinitdata = {
 };
 
 static int mtrr		__devinitdata = 3; /* enable mtrr by default */
-static int blank	= 1;		   /* enable blanking by default */
+static bool blank	= 1;		   /* enable blanking by default */
 static int ypan		= 1; 		 /* 0: scroll, 1: ypan, 2: ywrap */
 static bool pmi_setpal	__devinitdata = true; /* use PMI for palette changes */
-static int nocrtc	__devinitdata; /* ignore CRTC settings */
-static int noedid	__devinitdata; /* don't try DDC transfers */
+static bool nocrtc	__devinitdata; /* ignore CRTC settings */
+static bool noedid	__devinitdata; /* don't try DDC transfers */
 static int vram_remap	__devinitdata; /* set amt. of memory to be used */
 static int vram_total	__devinitdata; /* set total amount of memory */
 static u16 maxclk	__devinitdata; /* maximum pixel clock */
diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index bf2f78065cf9..501a922aa9dc 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -110,7 +110,7 @@ static struct fb_fix_screeninfo vfb_fix __devinitdata = {
 	.accel =	FB_ACCEL_NONE,
 };
 
-static int vfb_enable __initdata = 0;	/* disabled by default */
+static bool vfb_enable __initdata = 0;	/* disabled by default */
 module_param(vfb_enable, bool, 0);
 
 static int vfb_check_var(struct fb_var_screeninfo *var,
diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
index d4d8d1fdccc4..e45ca2b4bfbe 100644
--- a/drivers/watchdog/f71808e_wdt.c
+++ b/drivers/watchdog/f71808e_wdt.c
@@ -100,7 +100,7 @@ MODULE_PARM_DESC(f71862fg_pin,
 	"Watchdog f71862fg reset output pin configuration. Choose pin 56 or 63"
 			" (default=" __MODULE_STRING(WATCHDOG_F71862FG_PIN)")");
 
-static int nowayout = WATCHDOG_NOWAYOUT;
+static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0444);
 MODULE_PARM_DESC(nowayout, "Disable watchdog shutdown on close");
 
diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c
index eed5436ffb51..20feb4d3d791 100644
--- a/drivers/watchdog/mpc8xxx_wdt.c
+++ b/drivers/watchdog/mpc8xxx_wdt.c
@@ -55,7 +55,7 @@ module_param(timeout, ushort, 0);
 MODULE_PARM_DESC(timeout,
 	"Watchdog timeout in ticks. (0<timeout<65536, default=65535)");
 
-static int reset = 1;
+static bool reset = 1;
 module_param(reset, bool, 0);
 MODULE_PARM_DESC(reset,
 	"Watchdog Interrupt/Reset Mode. 0 = interrupt, 1 = reset");
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 52fed16d8701..30d7be026c18 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -16,7 +16,7 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
-static int permissive;
+static bool permissive;
 module_param(permissive, bool, 0644);
 
 /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 8e1c44d8ab46..d5dcf8d5d3d9 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -16,7 +16,7 @@
 #define INVALID_EVTCHN_IRQ  (-1)
 struct workqueue_struct *xen_pcibk_wq;
 
-static int __read_mostly passthrough;
+static bool __read_mostly passthrough;
 module_param(passthrough, bool, S_IRUGO);
 MODULE_PARM_DESC(passthrough,
 	"Option to specify how to export PCI topology to guest:\n"\
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 23d7451b2938..65ba36b80a9e 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -55,7 +55,7 @@ static				DEFINE_SPINLOCK(nsm_lock);
  * Local NSM state
  */
 u32	__read_mostly		nsm_local_state;
-int	__read_mostly		nsm_use_hostnames;
+bool	__read_mostly		nsm_use_hostnames;
 
 static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
 {
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 277dfaf2e99a..31778f74357d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -84,7 +84,7 @@ retry:
 /*
  * Turn off NFSv4 uid/gid mapping when using AUTH_SYS
  */
-static int nfs4_disable_idmapping = 1;
+static bool nfs4_disable_idmapping = true;
 
 /*
  * RPC cruft for NFS
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 25c3bfad7953..f649fba8c384 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -57,7 +57,7 @@
 #define NFS_64_BIT_INODE_NUMBERS_ENABLED	1
 
 /* Default is to see 64-bit inode numbers */
-static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
+static bool enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED;
 
 static void nfs_invalidate_inode(struct inode *);
 static int nfs_update_inode(struct inode *, struct nfs_fattr *);
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index f554a9313b43..7762bc2d8404 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -66,7 +66,7 @@ extern u8 acpi_gbl_create_osi_method;
 extern u8 acpi_gbl_use_default_register_widths;
 extern acpi_name acpi_gbl_trace_method_name;
 extern u32 acpi_gbl_trace_flags;
-extern u32 acpi_gbl_enable_aml_debug_object;
+extern bool acpi_gbl_enable_aml_debug_object;
 extern u8 acpi_gbl_copy_dsdt_locally;
 extern u8 acpi_gbl_truncate_io_addresses;
 extern u8 acpi_gbl_disable_auto_repair;
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index 51a527d24a8a..04f349d8da73 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -16,10 +16,10 @@
 
 #ifdef __KERNEL__
 
-extern int hest_disable;
+extern bool hest_disable;
 extern int erst_disable;
 #ifdef CONFIG_ACPI_APEI_GHES
-extern int ghes_disable;
+extern bool ghes_disable;
 #else
 #define ghes_disable 1
 #endif
diff --git a/include/linux/console.h b/include/linux/console.h
index 7453cfd593c8..7201ce4280ca 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -152,7 +152,7 @@ extern int braille_register_console(struct console *, int index,
 		char *console_options, char *braille_options);
 extern int braille_unregister_console(struct console *);
 extern void console_sysfs_notify(void);
-extern int console_suspend_enabled;
+extern bool console_suspend_enabled;
 
 /* Suspend and resume console messages over PM events */
 extern void suspend_console(void);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 90b0656a869e..88a114fce477 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -195,7 +195,7 @@ extern struct svc_procedure	nlmsvc_procedures4[];
 #endif
 extern int			nlmsvc_grace_period;
 extern unsigned long		nlmsvc_timeout;
-extern int			nsm_use_hostnames;
+extern bool			nsm_use_hostnames;
 extern u32			nsm_local_state;
 
 /*
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a3ac9c48e5de..8ef7894a48d0 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -396,7 +396,7 @@ static inline void mmc_set_disable_delay(struct mmc_host *host,
 }
 
 /* Module parameter */
-extern int mmc_assume_removable;
+extern bool mmc_assume_removable;
 
 static inline int mmc_card_is_removable(struct mmc_host *host)
 {
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 38ccaea08204..df3649560818 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -21,11 +21,11 @@
 
 /* Control parameters settable through module/boot flags */
 extern enum audit_mode aa_g_audit;
-extern int aa_g_audit_header;
-extern int aa_g_debug;
-extern int aa_g_lock_policy;
-extern int aa_g_logsyscall;
-extern int aa_g_paranoid_load;
+extern bool aa_g_audit_header;
+extern bool aa_g_debug;
+extern bool aa_g_lock_policy;
+extern bool aa_g_logsyscall;
+extern bool aa_g_paranoid_load;
 extern unsigned int aa_g_path_max;
 
 /*
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index d7f06f8b2837..68d50c54e431 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -708,7 +708,7 @@ module_param_call(mode, param_set_mode, param_get_mode,
 		  &aa_g_profile_mode, S_IRUSR | S_IWUSR);
 
 /* Debug mode */
-int aa_g_debug;
+bool aa_g_debug;
 module_param_named(debug, aa_g_debug, aabool, S_IRUSR | S_IWUSR);
 
 /* Audit mode */
@@ -719,7 +719,7 @@ module_param_call(audit, param_set_audit, param_get_audit,
 /* Determines if audit header is included in audited messages.  This
  * provides more context if the audit daemon is not running
  */
-int aa_g_audit_header = 1;
+bool aa_g_audit_header = 1;
 module_param_named(audit_header, aa_g_audit_header, aabool,
 		   S_IRUSR | S_IWUSR);
 
@@ -727,12 +727,12 @@ module_param_named(audit_header, aa_g_audit_header, aabool,
  * TODO: add in at boot loading of policy, which is the only way to
  *       load policy, if lock_policy is set
  */
-int aa_g_lock_policy;
+bool aa_g_lock_policy;
 module_param_named(lock_policy, aa_g_lock_policy, aalockpolicy,
 		   S_IRUSR | S_IWUSR);
 
 /* Syscall logging mode */
-int aa_g_logsyscall;
+bool aa_g_logsyscall;
 module_param_named(logsyscall, aa_g_logsyscall, aabool, S_IRUSR | S_IWUSR);
 
 /* Maximum pathname length before accesses will start getting rejected */
@@ -742,12 +742,12 @@ module_param_named(path_max, aa_g_path_max, aauint, S_IRUSR | S_IWUSR);
 /* Determines how paranoid loading of policy is and how much verification
  * on the loaded policy is done.
  */
-int aa_g_paranoid_load = 1;
+bool aa_g_paranoid_load = 1;
 module_param_named(paranoid_load, aa_g_paranoid_load, aabool,
 		   S_IRUSR | S_IWUSR);
 
 /* Boot time disable flag */
-static unsigned int apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE;
+static bool apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE;
 module_param_named(enabled, apparmor_enabled, aabool, S_IRUSR);
 
 static int __init apparmor_enabled_setup(char *str)
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 0fb448e6a1a3..a457d2138f49 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -32,7 +32,7 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 
-static int allow_unsafe_assigned_interrupts;
+static bool allow_unsafe_assigned_interrupts;
 module_param_named(allow_unsafe_assigned_interrupts,
 		   allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
-- 
cgit v1.2.3


From 72db395ffadb1d33233fd123c2bf87ba0198c6c1 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Jan 2012 09:32:28 +1030
Subject: module_param: check that bool parameters really are bool.

module_param(bool) used to counter-intuitively take an int.  In
fddd5201 (mid-2009) we allowed bool or int/unsigned int using a messy
trick.

This tightens the check (you'll get a warning about incompatible
return type) but still allows it.  Next kernel version, we'll remove
it.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 6bdde0c3bcca..c47f4d60db0b 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -350,17 +350,11 @@ extern int param_set_charp(const char *val, const struct kernel_param *kp);
 extern int param_get_charp(char *buffer, const struct kernel_param *kp);
 #define param_check_charp(name, p) __param_check(name, p, char *)
 
-/* For historical reasons "bool" parameters can be (unsigned) "int". */
+/* We used to allow int as well as bool.  We're taking that away! */
 extern struct kernel_param_ops param_ops_bool;
 extern int param_set_bool(const char *val, const struct kernel_param *kp);
 extern int param_get_bool(char *buffer, const struct kernel_param *kp);
-#define param_check_bool(name, p)					\
-	static inline void __check_##name(void)				\
-	{								\
-		BUILD_BUG_ON(!__same_type((p), bool *) &&		\
-			     !__same_type((p), unsigned int *) &&	\
-			     !__same_type((p), int *));			\
-	}
+#define param_check_bool(name, p) __param_check(name, p, bool)
 
 extern struct kernel_param_ops param_ops_invbool;
 extern int param_set_invbool(const char *val, const struct kernel_param *kp);
-- 
cgit v1.2.3


From eb8a54a78e974e1af3e17fa38bb74d3747c5c1bd Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Thu, 12 Jan 2012 15:23:04 -0800
Subject: phylib: introduce mdiobus_alloc_size()

Introduce function mdiobus_alloc_size() as an alternative to mdiobus_alloc().
Most callers of mdiobus_alloc() also allocate a private data structure, and
then manually point bus->priv to this object.  mdiobus_alloc_size()
combines the two operations into one, which simplifies memory management.

The original mdiobus_alloc() now just calls mdiobus_alloc_size(0).

Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 24 +++++++++++++++++++-----
 include/linux/phy.h        |  7 ++++++-
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 6c58da2b882c..88cc5db9affd 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -37,22 +37,36 @@
 #include <asm/uaccess.h>
 
 /**
- * mdiobus_alloc - allocate a mii_bus structure
+ * mdiobus_alloc_size - allocate a mii_bus structure
  *
  * Description: called by a bus driver to allocate an mii_bus
  * structure to fill in.
+ *
+ * 'size' is an an extra amount of memory to allocate for private storage.
+ * If non-zero, then bus->priv is points to that memory.
  */
-struct mii_bus *mdiobus_alloc(void)
+struct mii_bus *mdiobus_alloc_size(size_t size)
 {
 	struct mii_bus *bus;
+	size_t aligned_size = ALIGN(sizeof(*bus), NETDEV_ALIGN);
+	size_t alloc_size;
+
+	/* If we alloc extra space, it should be aligned */
+	if (size)
+		alloc_size = aligned_size + size;
+	else
+		alloc_size = sizeof(*bus);
 
-	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
-	if (bus != NULL)
+	bus = kzalloc(alloc_size, GFP_KERNEL);
+	if (bus) {
 		bus->state = MDIOBUS_ALLOCATED;
+		if (size)
+			bus->priv = (void *)bus + aligned_size;
+	}
 
 	return bus;
 }
-EXPORT_SYMBOL(mdiobus_alloc);
+EXPORT_SYMBOL(mdiobus_alloc_size);
 
 /**
  * mdiobus_release - mii_bus device release callback
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 79f337c47388..c599f7eca1e7 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -129,7 +129,12 @@ struct mii_bus {
 };
 #define to_mii_bus(d) container_of(d, struct mii_bus, dev)
 
-struct mii_bus *mdiobus_alloc(void);
+struct mii_bus *mdiobus_alloc_size(size_t);
+static inline struct mii_bus *mdiobus_alloc(void)
+{
+	return mdiobus_alloc_size(0);
+}
+
 int mdiobus_register(struct mii_bus *bus);
 void mdiobus_unregister(struct mii_bus *bus);
 void mdiobus_free(struct mii_bus *bus);
-- 
cgit v1.2.3


From ddecf0f4db44ef94847a62d6ecf74456b4dcc66f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 6 Jan 2012 06:31:44 +0000
Subject: net_sched: sfq: add optional RED on top of SFQ

Adds an optional Random Early Detection on each SFQ flow queue.

Traditional SFQ limits count of packets, while RED permits to also
control number of bytes per flow, and adds ECN capability as well.

1) We dont handle the idle time management in this RED implementation,
since each 'new flow' begins with a null qavg. We really want to address
backlogged flows.

2) if headdrop is selected, we try to ecn mark first packet instead of
currently enqueued packet. This gives faster feedback for tcp flows
compared to traditional RED [ marking the last packet in queue ]

Example of use :

tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 4sec sfq \
	limit 3000 headdrop flows 512 divisor 16384 \
	redflowlimit 100000 min 8000 max 60000 probability 0.20 ecn

qdisc sfq 10: parent 1:1 limit 3000p quantum 1514b depth 127 headdrop
flows 512/16384 divisor 16384
 ewma 6 min 8000b max 60000b probability 0.2 ecn
 prob_mark 0 prob_mark_head 4876 prob_drop 6131
 forced_mark 0 forced_mark_head 0 forced_drop 0
 Sent 1175211782 bytes 777537 pkt (dropped 6131, overlimits 11007
requeues 0)
 rate 99483Kbit 8219pps backlog 689392b 456p requeues 0

In this test, with 64 netperf TCP_STREAM sessions, 50% using ECN enabled
flows, we can see number of packets CE marked is smaller than number of
drops (for non ECN flows)

If same test is run, without RED, we can check backlog is much bigger.

qdisc sfq 10: parent 1:1 limit 3000p quantum 1514b depth 127 headdrop
flows 512/16384 divisor 16384
 Sent 1148683617 bytes 795006 pkt (dropped 0, overlimits 0 requeues 0)
 rate 98429Kbit 8521pps backlog 1221290b 841p requeues 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Stephen Hemminger <shemminger@vyatta.com>
CC: Dave Taht <dave.taht@gmail.com>
Tested-by: Dave Taht <dave.taht@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  20 +++++++
 include/net/red.h         |   3 +-
 net/sched/sch_sfq.c       | 146 +++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 152 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 8f1b928f777c..0d5b79365d03 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -162,10 +162,30 @@ struct tc_sfq_qopt {
 	unsigned	flows;		/* Maximal number of flows  */
 };
 
+struct tc_sfqred_stats {
+	__u32           prob_drop;      /* Early drops, below max threshold */
+	__u32           forced_drop;	/* Early drops, after max threshold */
+	__u32           prob_mark;      /* Marked packets, below max threshold */
+	__u32           forced_mark;    /* Marked packets, after max threshold */
+	__u32           prob_mark_head; /* Marked packets, below max threshold */
+	__u32           forced_mark_head;/* Marked packets, after max threshold */
+};
+
 struct tc_sfq_qopt_v1 {
 	struct tc_sfq_qopt v0;
 	unsigned int	depth;		/* max number of packets per flow */
 	unsigned int	headdrop;
+/* SFQRED parameters */
+	__u32		limit;		/* HARD maximal flow queue length (bytes) */
+	__u32		qth_min;	/* Min average length threshold (bytes) */
+	__u32		qth_max;	/* Max average length threshold (bytes) */
+	unsigned char   Wlog;		/* log(W)		*/
+	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
+	unsigned char   Scell_log;	/* cell size for idle damping */
+	unsigned char	flags;
+	__u32		max_P;		/* probability, high resolution */
+/* SFQRED stats */
+	struct tc_sfqred_stats stats;
 };
 
 
diff --git a/include/net/red.h b/include/net/red.h
index baab385a4736..28068ec614b2 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -199,7 +199,8 @@ static inline void red_set_parms(struct red_parms *p,
 	p->Scell_log	= Scell_log;
 	p->Scell_max	= (255 << Scell_log);
 
-	memcpy(p->Stab, stab, sizeof(p->Stab));
+	if (stab)
+		memcpy(p->Stab, stab, sizeof(p->Stab));
 }
 
 static inline int red_is_idling(const struct red_vars *v)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 0a7964009e8c..67494aef9acf 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -24,6 +24,7 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/flow_keys.h>
+#include <net/red.h>
 
 
 /*	Stochastic Fairness Queuing algorithm.
@@ -108,24 +109,30 @@ struct sfq_slot {
 	struct sfq_head dep; /* anchor in dep[] chains */
 	unsigned short	hash; /* hash value (index in ht[]) */
 	short		allot; /* credit for this slot */
+
+	unsigned int    backlog;
+	struct red_vars vars;
 };
 
 struct sfq_sched_data {
 /* frequently used fields */
 	int		limit;		/* limit of total number of packets in this qdisc */
 	unsigned int	divisor;	/* number of slots in hash table */
-	unsigned int	maxflows;	/* number of flows in flows array */
-	int		headdrop;
-	int		maxdepth;	/* limit of packets per flow */
+	u8		headdrop;
+	u8		maxdepth;	/* limit of packets per flow */
 
 	u32		perturbation;
-	struct tcf_proto *filter_list;
-	sfq_index	cur_depth;	/* depth of longest slot */
+	u8		cur_depth;	/* depth of longest slot */
+	u8		flags;
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
-	struct sfq_slot *tail;		/* current slot in round */
+	struct tcf_proto *filter_list;
 	sfq_index	*ht;		/* Hash table ('divisor' slots) */
 	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
 
+	struct red_parms *red_parms;
+	struct tc_sfqred_stats stats;
+	struct sfq_slot *tail;		/* current slot in round */
+
 	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
 					/* Linked lists of slots, indexed by depth
 					 * dep[0] : list of unused flows
@@ -133,6 +140,7 @@ struct sfq_sched_data {
 					 * dep[X] : list of flows with X packets
 					 */
 
+	unsigned int	maxflows;	/* number of flows in flows array */
 	int		perturb_period;
 	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
 	struct timer_list perturb_timer;
@@ -321,6 +329,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 drop:
 		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
 		len = qdisc_pkt_len(skb);
+		slot->backlog -= len;
 		sfq_dec(q, x);
 		kfree_skb(skb);
 		sch->q.qlen--;
@@ -341,6 +350,23 @@ drop:
 	return 0;
 }
 
+/* Is ECN parameter configured */
+static int sfq_prob_mark(const struct sfq_sched_data *q)
+{
+	return q->flags & TC_RED_ECN;
+}
+
+/* Should packets over max threshold just be marked */
+static int sfq_hard_mark(const struct sfq_sched_data *q)
+{
+	return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
+}
+
+static int sfq_headdrop(const struct sfq_sched_data *q)
+{
+	return q->headdrop;
+}
+
 static int
 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
@@ -349,6 +375,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	sfq_index x, qlen;
 	struct sfq_slot *slot;
 	int uninitialized_var(ret);
+	struct sk_buff *head;
+	int delta;
 
 	hash = sfq_classify(skb, sch, &ret);
 	if (hash == 0) {
@@ -368,24 +396,75 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->ht[hash] = x;
 		slot = &q->slots[x];
 		slot->hash = hash;
+		slot->backlog = 0; /* should already be 0 anyway... */
+		red_set_vars(&slot->vars);
+		goto enqueue;
 	}
+	if (q->red_parms) {
+		slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
+							&slot->vars,
+							slot->backlog);
+		switch (red_action(q->red_parms,
+				   &slot->vars,
+				   slot->vars.qavg)) {
+		case RED_DONT_MARK:
+			break;
 
-	if (slot->qlen >= q->maxdepth) {
-		struct sk_buff *head;
+		case RED_PROB_MARK:
+			sch->qstats.overlimits++;
+			if (sfq_prob_mark(q)) {
+				/* We know we have at least one packet in queue */
+				if (sfq_headdrop(q) &&
+				    INET_ECN_set_ce(slot->skblist_next)) {
+					q->stats.prob_mark_head++;
+					break;
+				}
+				if (INET_ECN_set_ce(skb)) {
+					q->stats.prob_mark++;
+					break;
+				}
+			}
+			q->stats.prob_drop++;
+			goto congestion_drop;
+
+		case RED_HARD_MARK:
+			sch->qstats.overlimits++;
+			if (sfq_hard_mark(q)) {
+				/* We know we have at least one packet in queue */
+				if (sfq_headdrop(q) &&
+				    INET_ECN_set_ce(slot->skblist_next)) {
+					q->stats.forced_mark_head++;
+					break;
+				}
+				if (INET_ECN_set_ce(skb)) {
+					q->stats.forced_mark++;
+					break;
+				}
+			}
+			q->stats.forced_drop++;
+			goto congestion_drop;
+		}
+	}
 
-		if (!q->headdrop)
+	if (slot->qlen >= q->maxdepth) {
+congestion_drop:
+		if (!sfq_headdrop(q))
 			return qdisc_drop(skb, sch);
 
+		/* We know we have at least one packet in queue */
 		head = slot_dequeue_head(slot);
-		sch->qstats.backlog -= qdisc_pkt_len(head);
+		delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
+		sch->qstats.backlog -= delta;
+		slot->backlog -= delta;
 		qdisc_drop(head, sch);
 
-		sch->qstats.backlog += qdisc_pkt_len(skb);
 		slot_queue_add(slot, skb);
 		return NET_XMIT_CN;
 	}
 
+enqueue:
 	sch->qstats.backlog += qdisc_pkt_len(skb);
+	slot->backlog += qdisc_pkt_len(skb);
 	slot_queue_add(slot, skb);
 	sfq_inc(q, x);
 	if (slot->qlen == 1) {		/* The flow is new */
@@ -396,6 +475,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			slot->next = q->tail->next;
 			q->tail->next = x;
 		}
+		/* We could use a bigger initial quantum for new flows */
 		slot->allot = q->scaled_quantum;
 	}
 	if (++sch->q.qlen <= q->limit)
@@ -439,7 +519,7 @@ next_slot:
 	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
-
+	slot->backlog -= qdisc_pkt_len(skb);
 	/* Is the slot empty? */
 	if (slot->qlen == 0) {
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
@@ -490,6 +570,8 @@ static void sfq_rehash(struct Qdisc *sch)
 			sfq_dec(q, i);
 			__skb_queue_tail(&list, skb);
 		}
+		slot->backlog = 0;
+		red_set_vars(&slot->vars);
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 	}
 	q->tail = NULL;
@@ -514,6 +596,11 @@ drop:				sch->qstats.backlog -= qdisc_pkt_len(skb);
 		if (slot->qlen >= q->maxdepth)
 			goto drop;
 		slot_queue_add(slot, skb);
+		if (q->red_parms)
+			slot->vars.qavg = red_calc_qavg(q->red_parms,
+							&slot->vars,
+							slot->backlog);
+		slot->backlog += qdisc_pkt_len(skb);
 		sfq_inc(q, x);
 		if (slot->qlen == 1) {		/* The flow is new */
 			if (q->tail == NULL) {	/* It is the first flow */
@@ -552,6 +639,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_sfq_qopt *ctl = nla_data(opt);
 	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
 	unsigned int qlen;
+	struct red_parms *p = NULL;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
@@ -560,7 +648,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	if (ctl->divisor &&
 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
 		return -EINVAL;
-
+	if (ctl_v1 && ctl_v1->qth_min) {
+		p = kmalloc(sizeof(*p), GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+	}
 	sch_tree_lock(sch);
 	if (ctl->quantum) {
 		q->quantum = ctl->quantum;
@@ -576,6 +668,16 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	if (ctl_v1) {
 		if (ctl_v1->depth)
 			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+		if (p) {
+			swap(q->red_parms, p);
+			red_set_parms(q->red_parms,
+				      ctl_v1->qth_min, ctl_v1->qth_max,
+				      ctl_v1->Wlog,
+				      ctl_v1->Plog, ctl_v1->Scell_log,
+				      NULL,
+				      ctl_v1->max_P);
+		}
+		q->flags = ctl_v1->flags;
 		q->headdrop = ctl_v1->headdrop;
 	}
 	if (ctl->limit) {
@@ -594,6 +696,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 		q->perturbation = net_random();
 	}
 	sch_tree_unlock(sch);
+	kfree(p);
 	return 0;
 }
 
@@ -625,6 +728,7 @@ static void sfq_destroy(struct Qdisc *sch)
 	del_timer_sync(&q->perturb_timer);
 	sfq_free(q->ht);
 	sfq_free(q->slots);
+	kfree(q->red_parms);
 }
 
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
@@ -683,6 +787,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_sfq_qopt_v1 opt;
+	struct red_parms *p = q->red_parms;
 
 	memset(&opt, 0, sizeof(opt));
 	opt.v0.quantum	= q->quantum;
@@ -693,6 +798,17 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	opt.depth	= q->maxdepth;
 	opt.headdrop	= q->headdrop;
 
+	if (p) {
+		opt.qth_min	= p->qth_min >> p->Wlog;
+		opt.qth_max	= p->qth_max >> p->Wlog;
+		opt.Wlog	= p->Wlog;
+		opt.Plog	= p->Plog;
+		opt.Scell_log	= p->Scell_log;
+		opt.max_P	= p->max_P;
+	}
+	memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
+	opt.flags	= q->flags;
+
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
 	return skb->len;
@@ -747,15 +863,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	sfq_index idx = q->ht[cl - 1];
 	struct gnet_stats_queue qs = { 0 };
 	struct tc_sfq_xstats xstats = { 0 };
-	struct sk_buff *skb;
 
 	if (idx != SFQ_EMPTY_SLOT) {
 		const struct sfq_slot *slot = &q->slots[idx];
 
 		xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
 		qs.qlen = slot->qlen;
-		slot_queue_walk(slot, skb)
-			qs.backlog += qdisc_pkt_len(skb);
+		qs.backlog = slot->backlog;
 	}
 	if (gnet_stats_copy_queue(d, &qs) < 0)
 		return -1;
-- 
cgit v1.2.3


From 4da47859956cebdc4c58c38a931e21847458d744 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:13 -0800
Subject: kernel.h: neaten panic prototype

Use __printf macro.
Convert NORET_AND to ATTRIB_NORET.
Use the normal kernel style for pointer arguments.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d0a7a0c71661..60934395e36c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -185,8 +185,9 @@ static inline void might_fault(void)
 
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
-NORET_TYPE void panic(const char * fmt, ...)
-	__attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
+NORET_TYPE __printf(1, 2)
+void panic(const char *fmt, ...)
+	ATTRIB_NORET __cold;
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
-- 
cgit v1.2.3


From 80bf007f20b16272f210e0803f739f5606cff59d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:14 -0800
Subject: include/linux/linkage.h: remove unused NORET_AND macro

The only use in kernel.h is gone so remove the macro.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/linkage.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 3f46aedea42f..c75074cb8ad4 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -90,6 +90,5 @@
 
 #define NORET_TYPE    /**/
 #define ATTRIB_NORET  __attribute__((noreturn))
-#define NORET_AND     noreturn,
 
 #endif
-- 
cgit v1.2.3


From 9402c95f34a66e81eba473a2f7267bbae5a1dee2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:17 -0800
Subject: treewide: remove useless NORET_TYPE macro and uses

It's a very old and now unused prototype marking so just delete it.

Neaten panic pointer argument style to keep checkpatch quiet.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/avr32/include/asm/system.h        | 2 +-
 arch/avr32/kernel/traps.c              | 2 +-
 arch/ia64/kernel/machine_kexec.c       | 2 +-
 arch/m68k/amiga/config.c               | 2 +-
 arch/mips/include/asm/ptrace.h         | 2 +-
 arch/mips/kernel/traps.c               | 2 +-
 arch/powerpc/kernel/machine_kexec_32.c | 2 +-
 arch/powerpc/kernel/machine_kexec_64.c | 6 +++---
 arch/s390/kernel/nmi.c                 | 2 +-
 arch/tile/kernel/machine_kexec.c       | 4 ++--
 include/linux/kernel.h                 | 6 +++---
 include/linux/linkage.h                | 1 -
 include/linux/sched.h                  | 2 +-
 kernel/exit.c                          | 6 +++---
 kernel/panic.c                         | 2 +-
 15 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/avr32/include/asm/system.h b/arch/avr32/include/asm/system.h
index 9702c2213e1e..62d9ded01635 100644
--- a/arch/avr32/include/asm/system.h
+++ b/arch/avr32/include/asm/system.h
@@ -169,7 +169,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
 
 struct pt_regs;
-void NORET_TYPE die(const char *str, struct pt_regs *regs, long err);
+void die(const char *str, struct pt_regs *regs, long err);
 void _exception(long signr, struct pt_regs *regs, int code,
 		unsigned long addr);
 
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 7aa25756412f..3d760c06f024 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -24,7 +24,7 @@
 
 static DEFINE_SPINLOCK(die_lock);
 
-void NORET_TYPE die(const char *str, struct pt_regs *regs, long err)
+void die(const char *str, struct pt_regs *regs, long err)
 {
 	static int die_counter;
 
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 3d3aeef46947..581a16d5e85b 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -27,7 +27,7 @@
 #include <asm/sal.h>
 #include <asm/mca.h>
 
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
 					unsigned long indirection_page,
 					unsigned long start_address,
 					struct ia64_boot_param *boot_param,
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 82a4bb51d5d8..a3b0558328b6 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -511,7 +511,7 @@ static unsigned long amiga_gettimeoffset(void)
 	return ticks + offset;
 }
 
-static NORET_TYPE void amiga_reset(void)
+static void amiga_reset(void)
     ATTRIB_NORET;
 
 static void amiga_reset(void)
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index de39b1f343ea..3d913259e507 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -144,7 +144,7 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
 extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
 extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
-extern NORET_TYPE void die(const char *, struct pt_regs *) ATTRIB_NORET;
+extern void die(const char *, struct pt_regs *) ATTRIB_NORET;
 
 static inline void die_if_kernel(const char *str, struct pt_regs *regs)
 {
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 5c8a49d55054..725e9a5ca966 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1340,7 +1340,7 @@ void ejtag_exception_handler(struct pt_regs *regs)
 /*
  * NMI exception handler.
  */
-NORET_TYPE void ATTRIB_NORET nmi_exception_handler(struct pt_regs *regs)
+void ATTRIB_NORET nmi_exception_handler(struct pt_regs *regs)
 {
 	bust_spinlocks(1);
 	printk("NMI taken!!!!\n");
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index e63f2e7d2efb..026e7f153949 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -16,7 +16,7 @@
 #include <asm/hw_irq.h>
 #include <asm/io.h>
 
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
 				unsigned long indirection_page,
 				unsigned long reboot_code_buffer,
 				unsigned long start_address) ATTRIB_NORET;
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 26ccbf77dd41..5fbbf814923a 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -307,9 +307,9 @@ static union thread_union kexec_stack __init_task_data =
 struct paca_struct kexec_paca;
 
 /* Our assembly helper, in kexec_stub.S */
-extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
-					void *image, void *control,
-					void (*clear_all)(void)) ATTRIB_NORET;
+extern void kexec_sequence(void *newstack, unsigned long start,
+			   void *image, void *control,
+			   void (*clear_all)(void)) ATTRIB_NORET;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index fab88431a06f..0fd2e863e114 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -30,7 +30,7 @@ struct mcck_struct {
 
 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
 
-static NORET_TYPE void s390_handle_damage(char *msg)
+static void s390_handle_damage(char *msg)
 {
 	smp_send_stop();
 	disabled_wait((unsigned long) __builtin_return_address(0));
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index e00d7179989e..b0c907059067 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c
@@ -248,10 +248,10 @@ static void setup_quasi_va_is_pa(void)
 }
 
 
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
 {
 	void *reboot_code_buffer;
-	NORET_TYPE void (*rnk)(unsigned long, void *, unsigned long)
+	void (*rnk)(unsigned long, void *, unsigned long)
 		ATTRIB_NORET;
 
 	/* Mask all interrupts before starting to reboot. */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 60934395e36c..aaf1753dd2b3 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -185,16 +185,16 @@ static inline void might_fault(void)
 
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
-NORET_TYPE __printf(1, 2)
+__printf(1, 2)
 void panic(const char *fmt, ...)
 	ATTRIB_NORET __cold;
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
-NORET_TYPE void do_exit(long error_code)
+void do_exit(long error_code)
 	ATTRIB_NORET;
-NORET_TYPE void complete_and_exit(struct completion *, long)
+void complete_and_exit(struct completion *, long)
 	ATTRIB_NORET;
 
 /* Internal, do not use. */
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index c75074cb8ad4..6a8f252e49ee 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -88,7 +88,6 @@
 
 #endif
 
-#define NORET_TYPE    /**/
 #define ATTRIB_NORET  __attribute__((noreturn))
 
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 21cd0303af51..4032ec1cf836 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2275,7 +2275,7 @@ extern void __cleanup_sighand(struct sighand_struct *);
 extern void exit_itimers(struct signal_struct *);
 extern void flush_itimer_signals(void);
 
-extern NORET_TYPE void do_group_exit(int);
+extern void do_group_exit(int);
 
 extern void daemonize(const char *, ...);
 extern int allow_signal(int);
diff --git a/kernel/exit.c b/kernel/exit.c
index 94ed6e20bb53..c44738267be7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -887,7 +887,7 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
-NORET_TYPE void do_exit(long code)
+void do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
@@ -1051,7 +1051,7 @@ NORET_TYPE void do_exit(long code)
 
 EXPORT_SYMBOL_GPL(do_exit);
 
-NORET_TYPE void complete_and_exit(struct completion *comp, long code)
+void complete_and_exit(struct completion *comp, long code)
 {
 	if (comp)
 		complete(comp);
@@ -1070,7 +1070,7 @@ SYSCALL_DEFINE1(exit, int, error_code)
  * Take down every thread in the group.  This is called by fatal signals
  * as well as by sys_exit_group (below).
  */
-NORET_TYPE void
+void
 do_group_exit(int exit_code)
 {
 	struct signal_struct *sig = current->signal;
diff --git a/kernel/panic.c b/kernel/panic.c
index 3458469eb7c3..6fd09ed6fd90 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -57,7 +57,7 @@ EXPORT_SYMBOL(panic_blink);
  *
  *	This function never returns.
  */
-NORET_TYPE void panic(const char * fmt, ...)
+void panic(const char *fmt, ...)
 {
 	static char buf[1024];
 	va_list args;
-- 
cgit v1.2.3


From ff2d8b19a3a62559afba1c53360c8577a7697714 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:21 -0800
Subject: treewide: convert uses of ATTRIB_NORETURN to __noreturn

Use the more commonly used __noreturn instead of ATTRIB_NORETURN.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/machine_kexec.c       | 2 +-
 arch/m68k/amiga/config.c               | 3 +--
 arch/mips/include/asm/ptrace.h         | 2 +-
 arch/mips/kernel/traps.c               | 2 +-
 arch/mn10300/include/asm/exceptions.h  | 2 +-
 arch/powerpc/kernel/machine_kexec_32.c | 2 +-
 arch/powerpc/kernel/machine_kexec_64.c | 2 +-
 arch/s390/include/asm/processor.h      | 2 +-
 arch/sh/kernel/process_32.c            | 2 +-
 arch/sh/kernel/process_64.c            | 2 +-
 arch/tile/kernel/machine_kexec.c       | 2 +-
 include/linux/kernel.h                 | 6 +++---
 12 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 581a16d5e85b..4eed35814994 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -31,7 +31,7 @@ typedef void (*relocate_new_kernel_t)(
 					unsigned long indirection_page,
 					unsigned long start_address,
 					struct ia64_boot_param *boot_param,
-					unsigned long pal_addr) ATTRIB_NORET;
+					unsigned long pal_addr) __noreturn;
 
 struct kimage *ia64_kimage;
 
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index a3b0558328b6..b95a451b1c3a 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -511,8 +511,7 @@ static unsigned long amiga_gettimeoffset(void)
 	return ticks + offset;
 }
 
-static void amiga_reset(void)
-    ATTRIB_NORET;
+static void amiga_reset(void)  __noreturn;
 
 static void amiga_reset(void)
 {
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index 3d913259e507..7b99c670e478 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -144,7 +144,7 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
 extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
 extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
-extern void die(const char *, struct pt_regs *) ATTRIB_NORET;
+extern void die(const char *, struct pt_regs *) __noreturn;
 
 static inline void die_if_kernel(const char *str, struct pt_regs *regs)
 {
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 725e9a5ca966..bbddb86c1fa1 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1340,7 +1340,7 @@ void ejtag_exception_handler(struct pt_regs *regs)
 /*
  * NMI exception handler.
  */
-void ATTRIB_NORET nmi_exception_handler(struct pt_regs *regs)
+void __noreturn nmi_exception_handler(struct pt_regs *regs)
 {
 	bust_spinlocks(1);
 	printk("NMI taken!!!!\n");
diff --git a/arch/mn10300/include/asm/exceptions.h b/arch/mn10300/include/asm/exceptions.h
index ca3e20508c77..95a4d42c3a06 100644
--- a/arch/mn10300/include/asm/exceptions.h
+++ b/arch/mn10300/include/asm/exceptions.h
@@ -110,7 +110,7 @@ extern asmlinkage void nmi_handler(void);
 extern asmlinkage void misalignment(struct pt_regs *, enum exception_code);
 
 extern void die(const char *, struct pt_regs *, enum exception_code)
-	ATTRIB_NORET;
+	__noreturn;
 
 extern int die_if_no_fixup(const char *, struct pt_regs *, enum exception_code);
 
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index 026e7f153949..affe5dcce7f4 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -19,7 +19,7 @@
 typedef void (*relocate_new_kernel_t)(
 				unsigned long indirection_page,
 				unsigned long reboot_code_buffer,
-				unsigned long start_address) ATTRIB_NORET;
+				unsigned long start_address) __noreturn;
 
 /*
  * This is a generic machine_kexec function suitable at least for
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 5fbbf814923a..d7f609086a99 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -309,7 +309,7 @@ struct paca_struct kexec_paca;
 /* Our assembly helper, in kexec_stub.S */
 extern void kexec_sequence(void *newstack, unsigned long start,
 			   void *image, void *control,
-			   void (*clear_all)(void)) ATTRIB_NORET;
+			   void (*clear_all)(void)) __noreturn;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 27272f6a14c2..d25843a6a915 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -236,7 +236,7 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
 /*
  * Function to drop a processor into disabled wait state
  */
-static inline void ATTRIB_NORET disabled_wait(unsigned long code)
+static inline void __noreturn disabled_wait(unsigned long code)
 {
         unsigned long ctl_buf;
         psw_t dw_psw;
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index aaf6d59c2012..7ec665178125 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -70,7 +70,7 @@ void show_regs(struct pt_regs * regs)
 /*
  * Create a kernel thread
  */
-ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
 {
 	do_exit(fn(arg));
 }
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 210c1cabcb7f..cbd4e4bb9fc5 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -285,7 +285,7 @@ void show_regs(struct pt_regs *regs)
 /*
  * Create a kernel thread
  */
-ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
 {
 	do_exit(fn(arg));
 }
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index b0c907059067..6255f2eab112 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c
@@ -252,7 +252,7 @@ void machine_kexec(struct kimage *image)
 {
 	void *reboot_code_buffer;
 	void (*rnk)(unsigned long, void *, unsigned long)
-		ATTRIB_NORET;
+		__noreturn;
 
 	/* Mask all interrupts before starting to reboot. */
 	interrupt_mask_set_mask(~0ULL);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index aaf1753dd2b3..e8343422240a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -187,15 +187,15 @@ extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
 __printf(1, 2)
 void panic(const char *fmt, ...)
-	ATTRIB_NORET __cold;
+	__noreturn __cold;
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
 void do_exit(long error_code)
-	ATTRIB_NORET;
+	__noreturn;
 void complete_and_exit(struct completion *, long)
-	ATTRIB_NORET;
+	__noreturn;
 
 /* Internal, do not use. */
 int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
-- 
cgit v1.2.3


From 0d259cf8190b9c446eefd5225ffcc3941e76a432 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 12 Jan 2012 17:17:25 -0800
Subject: include/linux/linkage.h: remove unused ATTRIB_NORET macro

The uses have been renamed so delete the unused macro.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/linkage.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 6a8f252e49ee..807f1e533226 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -88,6 +88,4 @@
 
 #endif
 
-#define ATTRIB_NORET  __attribute__((noreturn))
-
 #endif
-- 
cgit v1.2.3


From 43570fd2f47ba518145e9289f54cde3dba4c8b25 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 12 Jan 2012 17:17:27 -0800
Subject: mm,slub,x86: decouple size of struct page from CONFIG_CMPXCHG_LOCAL

While implementing cmpxchg_double() on s390 I realized that we don't set
CONFIG_CMPXCHG_LOCAL despite the fact that we have support for it.

However setting that option will increase the size of struct page by
eight bytes on 64 bit, which we certainly do not want.  Also, it doesn't
make sense that a present cpu feature should increase the size of struct
page.

Besides that it looks like the dependency to CMPXCHG_LOCAL is wrong and
that it should depend on CMPXCHG_DOUBLE instead.

This patch:

If an architecture supports CMPXCHG_LOCAL this shouldn't result
automatically in larger struct pages if the SLUB allocator is used.
Instead introduce a new config option "HAVE_ALIGNED_STRUCT_PAGE" which
can be selected if a double word aligned struct page is required.  Also
update x86 Kconfig so that it should work as before.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig             | 8 ++++++++
 arch/x86/Kconfig         | 1 +
 include/linux/mm_types.h | 9 ++++-----
 mm/slub.c                | 6 +++---
 4 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 2505740b81d2..a2c5c077c32d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -185,4 +185,12 @@ config HAVE_RCU_TABLE_FREE
 config ARCH_HAVE_NMI_SAFE_CMPXCHG
 	bool
 
+config HAVE_ALIGNED_STRUCT_PAGE
+	bool
+	help
+	  This makes sure that struct pages are double word aligned and that
+	  e.g. the SLUB allocator can perform double word atomic operations
+	  on a struct page for better performance. However selecting this
+	  might increase the size of a struct page by a word.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a150f4c35e94..5201a2c27239 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,6 +60,7 @@ config X86
 	select PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
 	select ANON_INODES
+	select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_USER_RETURN_NOTIFIER
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5b42f1b34eb7..3cc3062b3767 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -151,12 +151,11 @@ struct page {
 #endif
 }
 /*
- * If another subsystem starts using the double word pairing for atomic
- * operations on struct page then it must change the #if to ensure
- * proper alignment of the page struct.
+ * The struct page can be forced to be double word aligned so that atomic ops
+ * on double words work. The SLUB allocator can make use of such a feature.
  */
-#if defined(CONFIG_SLUB) && defined(CONFIG_CMPXCHG_LOCAL)
-	__attribute__((__aligned__(2*sizeof(unsigned long))))
+#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
+	__aligned(2 * sizeof(unsigned long))
 #endif
 ;
 
diff --git a/mm/slub.c b/mm/slub.c
index 5d37b5e44140..72aa84134609 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -366,7 +366,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
 		const char *n)
 {
 	VM_BUG_ON(!irqs_disabled());
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_CMPXCHG_DOUBLE) && defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (s->flags & __CMPXCHG_DOUBLE) {
 		if (cmpxchg_double(&page->freelist, &page->counters,
 			freelist_old, counters_old,
@@ -400,7 +400,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 		void *freelist_new, unsigned long counters_new,
 		const char *n)
 {
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_CMPXCHG_DOUBLE) && defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (s->flags & __CMPXCHG_DOUBLE) {
 		if (cmpxchg_double(&page->freelist, &page->counters,
 			freelist_old, counters_old,
@@ -3014,7 +3014,7 @@ static int kmem_cache_open(struct kmem_cache *s,
 		}
 	}
 
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_CMPXCHG_DOUBLE) && defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
 		/* Enable fast mode */
 		s->flags |= __CMPXCHG_DOUBLE;
-- 
cgit v1.2.3


From 28d82dc1c4edbc352129f97f4ca22624d1fe61de Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Thu, 12 Jan 2012 17:17:43 -0800
Subject: epoll: limit paths

The current epoll code can be tickled to run basically indefinitely in
both loop detection path check (on ep_insert()), and in the wakeup paths.
The programs that tickle this behavior set up deeply linked networks of
epoll file descriptors that cause the epoll algorithms to traverse them
indefinitely.  A couple of these sample programs have been previously
posted in this thread: https://lkml.org/lkml/2011/2/25/297.

To fix the loop detection path check algorithms, I simply keep track of
the epoll nodes that have been already visited.  Thus, the loop detection
becomes proportional to the number of epoll file descriptor and links.
This dramatically decreases the run-time of the loop check algorithm.  In
one diabolical case I tried it reduced the run-time from 15 mintues (all
in kernel time) to .3 seconds.

Fixing the wakeup paths could be done at wakeup time in a similar manner
by keeping track of nodes that have already been visited, but the
complexity is harder, since there can be multiple wakeups on different
cpus...Thus, I've opted to limit the number of possible wakeup paths when
the paths are created.

This is accomplished, by noting that the end file descriptor points that
are found during the loop detection pass (from the newly added link), are
actually the sources for wakeup events.  I keep a list of these file
descriptors and limit the number and length of these paths that emanate
from these 'source file descriptors'.  In the current implemetation I
allow 1000 paths of length 1, 500 of length 2, 100 of length 3, 50 of
length 4 and 10 of length 5.  Note that it is sufficient to check the
'source file descriptors' reachable from the newly added link, since no
other 'source file descriptors' will have newly added links.  This allows
us to check only the wakeup paths that may have gotten too long, and not
re-check all possible wakeup paths on the system.

In terms of the path limit selection, I think its first worth noting that
the most common case for epoll, is probably the model where you have 1
epoll file descriptor that is monitoring n number of 'source file
descriptors'.  In this case, each 'source file descriptor' has a 1 path of
length 1.  Thus, I believe that the limits I'm proposing are quite
reasonable and in fact may be too generous.  Thus, I'm hoping that the
proposed limits will not prevent any workloads that currently work to
fail.

In terms of locking, I have extended the use of the 'epmutex' to all
epoll_ctl add and remove operations.  Currently its only used in a subset
of the add paths.  I need to hold the epmutex, so that we can correctly
traverse a coherent graph, to check the number of paths.  I believe that
this additional locking is probably ok, since its in the setup/teardown
paths, and doesn't affect the running paths, but it certainly is going to
add some extra overhead.  Also, worth noting is that the epmuex was
recently added to the ep_ctl add operations in the initial path loop
detection code using the argument that it was not on a critical path.

Another thing to note here, is the length of epoll chains that is allowed.
Currently, eventpoll.c defines:

/* Maximum number of nesting allowed inside epoll sets */
#define EP_MAX_NESTS 4

This basically means that I am limited to a graph depth of 5 (EP_MAX_NESTS
+ 1).  However, this limit is currently only enforced during the loop
check detection code, and only when the epoll file descriptors are added
in a certain order.  Thus, this limit is currently easily bypassed.  The
newly added check for wakeup paths, stricly limits the wakeup paths to a
length of 5, regardless of the order in which ep's are linked together.
Thus, a side-effect of the new code is a more consistent enforcement of
the graph depth.

Thus far, I've tested this, using the sample programs previously
mentioned, which now either return quickly or return -EINVAL.  I've also
testing using the piptest.c epoll tester, which showed no difference in
performance.  I've also created a number of different epoll networks and
tested that they behave as expectded.

I believe this solves the original diabolical test cases, while still
preserving the sane epoll nesting.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Nelson Elhage <nelhage@ksplice.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c            | 234 +++++++++++++++++++++++++++++++++++++++++-----
 include/linux/eventpoll.h |   1 +
 include/linux/fs.h        |   1 +
 3 files changed, 211 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 828e750af23a..aabdfc38cf24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -197,6 +197,12 @@ struct eventpoll {
 
 	/* The user that created the eventpoll descriptor */
 	struct user_struct *user;
+
+	struct file *file;
+
+	/* used to optimize loop detection check */
+	int visited;
+	struct list_head visited_list_link;
 };
 
 /* Wait structure used by the poll hooks */
@@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly;
 /* Slab cache used to allocate "struct eppoll_entry" */
 static struct kmem_cache *pwq_cache __read_mostly;
 
+/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
+static LIST_HEAD(visited_list);
+
+/*
+ * List of files with newly added links, where we may need to limit the number
+ * of emanating paths. Protected by the epmutex.
+ */
+static LIST_HEAD(tfile_check_list);
+
 #ifdef CONFIG_SYSCTL
 
 #include <linux/sysctl.h>
@@ -276,6 +291,12 @@ ctl_table epoll_table[] = {
 };
 #endif /* CONFIG_SYSCTL */
 
+static const struct file_operations eventpoll_fops;
+
+static inline int is_file_epoll(struct file *f)
+{
+	return f->f_op == &eventpoll_fops;
+}
 
 /* Setup the structure that is used as key for the RB tree */
 static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -711,12 +732,6 @@ static const struct file_operations eventpoll_fops = {
 	.llseek		= noop_llseek,
 };
 
-/* Fast test to see if the file is an eventpoll file */
-static inline int is_file_epoll(struct file *f)
-{
-	return f->f_op == &eventpoll_fops;
-}
-
 /*
  * This is called from eventpoll_release() to unlink files from the eventpoll
  * interface. We need to have this facility to cleanup correctly files that are
@@ -926,6 +941,99 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
 	rb_insert_color(&epi->rbn, &ep->rbr);
 }
 
+
+
+#define PATH_ARR_SIZE 5
+/*
+ * These are the number paths of length 1 to 5, that we are allowing to emanate
+ * from a single file of interest. For example, we allow 1000 paths of length
+ * 1, to emanate from each file of interest. This essentially represents the
+ * potential wakeup paths, which need to be limited in order to avoid massive
+ * uncontrolled wakeup storms. The common use case should be a single ep which
+ * is connected to n file sources. In this case each file source has 1 path
+ * of length 1. Thus, the numbers below should be more than sufficient. These
+ * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
+ * and delete can't add additional paths. Protected by the epmutex.
+ */
+static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
+static int path_count[PATH_ARR_SIZE];
+
+static int path_count_inc(int nests)
+{
+	if (++path_count[nests] > path_limits[nests])
+		return -1;
+	return 0;
+}
+
+static void path_count_init(void)
+{
+	int i;
+
+	for (i = 0; i < PATH_ARR_SIZE; i++)
+		path_count[i] = 0;
+}
+
+static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
+{
+	int error = 0;
+	struct file *file = priv;
+	struct file *child_file;
+	struct epitem *epi;
+
+	list_for_each_entry(epi, &file->f_ep_links, fllink) {
+		child_file = epi->ep->file;
+		if (is_file_epoll(child_file)) {
+			if (list_empty(&child_file->f_ep_links)) {
+				if (path_count_inc(call_nests)) {
+					error = -1;
+					break;
+				}
+			} else {
+				error = ep_call_nested(&poll_loop_ncalls,
+							EP_MAX_NESTS,
+							reverse_path_check_proc,
+							child_file, child_file,
+							current);
+			}
+			if (error != 0)
+				break;
+		} else {
+			printk(KERN_ERR "reverse_path_check_proc: "
+				"file is not an ep!\n");
+		}
+	}
+	return error;
+}
+
+/**
+ * reverse_path_check - The tfile_check_list is list of file *, which have
+ *                      links that are proposed to be newly added. We need to
+ *                      make sure that those added links don't add too many
+ *                      paths such that we will spend all our time waking up
+ *                      eventpoll objects.
+ *
+ * Returns: Returns zero if the proposed links don't create too many paths,
+ *	    -1 otherwise.
+ */
+static int reverse_path_check(void)
+{
+	int length = 0;
+	int error = 0;
+	struct file *current_file;
+
+	/* let's call this for all tfiles */
+	list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
+		length++;
+		path_count_init();
+		error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+					reverse_path_check_proc, current_file,
+					current_file, current);
+		if (error)
+			break;
+	}
+	return error;
+}
+
 /*
  * Must be called with "mtx" held.
  */
@@ -987,6 +1095,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 	 */
 	ep_rbtree_insert(ep, epi);
 
+	/* now check if we've created too many backpaths */
+	error = -EINVAL;
+	if (reverse_path_check())
+		goto error_remove_epi;
+
 	/* We have to drop the new item inside our item list to keep track of it */
 	spin_lock_irqsave(&ep->lock, flags);
 
@@ -1011,6 +1124,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 
 	return 0;
 
+error_remove_epi:
+	spin_lock(&tfile->f_lock);
+	if (ep_is_linked(&epi->fllink))
+		list_del_init(&epi->fllink);
+	spin_unlock(&tfile->f_lock);
+
+	rb_erase(&epi->rbn, &ep->rbr);
+
 error_unregister:
 	ep_unregister_pollwait(ep, epi);
 
@@ -1275,18 +1396,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
 	int error = 0;
 	struct file *file = priv;
 	struct eventpoll *ep = file->private_data;
+	struct eventpoll *ep_tovisit;
 	struct rb_node *rbp;
 	struct epitem *epi;
 
 	mutex_lock_nested(&ep->mtx, call_nests + 1);
+	ep->visited = 1;
+	list_add(&ep->visited_list_link, &visited_list);
 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
 		epi = rb_entry(rbp, struct epitem, rbn);
 		if (unlikely(is_file_epoll(epi->ffd.file))) {
+			ep_tovisit = epi->ffd.file->private_data;
+			if (ep_tovisit->visited)
+				continue;
 			error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
-					       ep_loop_check_proc, epi->ffd.file,
-					       epi->ffd.file->private_data, current);
+					ep_loop_check_proc, epi->ffd.file,
+					ep_tovisit, current);
 			if (error != 0)
 				break;
+		} else {
+			/*
+			 * If we've reached a file that is not associated with
+			 * an ep, then we need to check if the newly added
+			 * links are going to add too many wakeup paths. We do
+			 * this by adding it to the tfile_check_list, if it's
+			 * not already there, and calling reverse_path_check()
+			 * during ep_insert().
+			 */
+			if (list_empty(&epi->ffd.file->f_tfile_llink))
+				list_add(&epi->ffd.file->f_tfile_llink,
+					 &tfile_check_list);
 		}
 	}
 	mutex_unlock(&ep->mtx);
@@ -1307,8 +1446,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
  */
 static int ep_loop_check(struct eventpoll *ep, struct file *file)
 {
-	return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+	int ret;
+	struct eventpoll *ep_cur, *ep_next;
+
+	ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
 			      ep_loop_check_proc, file, ep, current);
+	/* clear visited list */
+	list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
+							visited_list_link) {
+		ep_cur->visited = 0;
+		list_del(&ep_cur->visited_list_link);
+	}
+	return ret;
+}
+
+static void clear_tfile_check_list(void)
+{
+	struct file *file;
+
+	/* first clear the tfile_check_list */
+	while (!list_empty(&tfile_check_list)) {
+		file = list_first_entry(&tfile_check_list, struct file,
+					f_tfile_llink);
+		list_del_init(&file->f_tfile_llink);
+	}
+	INIT_LIST_HEAD(&tfile_check_list);
 }
 
 /*
@@ -1316,8 +1478,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
  */
 SYSCALL_DEFINE1(epoll_create1, int, flags)
 {
-	int error;
+	int error, fd;
 	struct eventpoll *ep = NULL;
+	struct file *file;
 
 	/* Check the EPOLL_* constant for consistency.  */
 	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
@@ -1334,11 +1497,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure and a free file descriptor.
 	 */
-	error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+	fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
+	if (fd < 0) {
+		error = fd;
+		goto out_free_ep;
+	}
+	file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
 				 O_RDWR | (flags & O_CLOEXEC));
-	if (error < 0)
-		ep_free(ep);
-
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto out_free_fd;
+	}
+	fd_install(fd, file);
+	ep->file = file;
+	return fd;
+
+out_free_fd:
+	put_unused_fd(fd);
+out_free_ep:
+	ep_free(ep);
 	return error;
 }
 
@@ -1404,21 +1581,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 	/*
 	 * When we insert an epoll file descriptor, inside another epoll file
 	 * descriptor, there is the change of creating closed loops, which are
-	 * better be handled here, than in more critical paths.
+	 * better be handled here, than in more critical paths. While we are
+	 * checking for loops we also determine the list of files reachable
+	 * and hang them on the tfile_check_list, so we can check that we
+	 * haven't created too many possible wakeup paths.
 	 *
-	 * We hold epmutex across the loop check and the insert in this case, in
-	 * order to prevent two separate inserts from racing and each doing the
-	 * insert "at the same time" such that ep_loop_check passes on both
-	 * before either one does the insert, thereby creating a cycle.
+	 * We need to hold the epmutex across both ep_insert and ep_remove
+	 * b/c we want to make sure we are looking at a coherent view of
+	 * epoll network.
 	 */
-	if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+	if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
 		mutex_lock(&epmutex);
 		did_lock_epmutex = 1;
-		error = -ELOOP;
-		if (ep_loop_check(ep, tfile) != 0)
-			goto error_tgt_fput;
 	}
-
+	if (op == EPOLL_CTL_ADD) {
+		if (is_file_epoll(tfile)) {
+			error = -ELOOP;
+			if (ep_loop_check(ep, tfile) != 0)
+				goto error_tgt_fput;
+		} else
+			list_add(&tfile->f_tfile_llink, &tfile_check_list);
+	}
 
 	mutex_lock_nested(&ep->mtx, 0);
 
@@ -1437,6 +1620,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 			error = ep_insert(ep, &epds, tfile, fd);
 		} else
 			error = -EEXIST;
+		clear_tfile_check_list();
 		break;
 	case EPOLL_CTL_DEL:
 		if (epi)
@@ -1455,7 +1639,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 	mutex_unlock(&ep->mtx);
 
 error_tgt_fput:
-	if (unlikely(did_lock_epmutex))
+	if (did_lock_epmutex)
 		mutex_unlock(&epmutex);
 
 	fput(tfile);
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index f362733186a5..657ab55beda0 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -61,6 +61,7 @@ struct file;
 static inline void eventpoll_init_file(struct file *file)
 {
 	INIT_LIST_HEAD(&file->f_ep_links);
+	INIT_LIST_HEAD(&file->f_tfile_llink);
 }
 
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7aacf31418fe..a7409bc157e0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1001,6 +1001,7 @@ struct file {
 #ifdef CONFIG_EPOLL
 	/* Used by fs/eventpoll.c to link all the hooks to this file */
 	struct list_head	f_ep_links;
+	struct list_head	f_tfile_llink;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
 #ifdef CONFIG_DEBUG_WRITECOUNT
-- 
cgit v1.2.3


From ab936cbcd02072a34b60d268f94440fd5cf1970b Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 12 Jan 2012 17:17:44 -0800
Subject: memcg: add mem_cgroup_replace_page_cache() to fix LRU issue

Commit ef6a3c6311 ("mm: add replace_page_cache_page() function") added a
function replace_page_cache_page().  This function replaces a page in the
radix-tree with a new page.  WHen doing this, memory cgroup needs to fix
up the accounting information.  memcg need to check PCG_USED bit etc.

In some(many?) cases, 'newpage' is on LRU before calling
replace_page_cache().  So, memcg's LRU accounting information should be
fixed, too.

This patch adds mem_cgroup_replace_page_cache() and removes the old hooks.
 In that function, old pages will be unaccounted without touching
res_counter and new page will be accounted to the memcg (of old page).
WHen overwriting pc->mem_cgroup of newpage, take zone->lru_lock and avoid
races with LRU handling.

Background:
  replace_page_cache_page() is called by FUSE code in its splice() handling.
  Here, 'newpage' is replacing oldpage but this newpage is not a newly allocated
  page and may be on LRU. LRU mis-accounting will be critical for memory cgroup
  because rmdir() checks the whole LRU is empty and there is no account leak.
  If a page is on the other LRU than it should be, rmdir() will fail.

This bug was added in March 2011, but no bug report yet.  I guess there
are not many people who use memcg and FUSE at the same time with upstream
kernels.

The result of this bug is that admin cannot destroy a memcg because of
account leak.  So, no panic, no deadlock.  And, even if an active cgroup
exist, umount can succseed.  So no problem at shutdown.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  6 ++++++
 mm/filemap.c               | 18 ++----------------
 mm/memcontrol.c            | 44 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f944591765eb..3558a5e268cf 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -122,6 +122,8 @@ struct zone_reclaim_stat*
 mem_cgroup_get_reclaim_stat_from_page(struct page *page);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 					struct task_struct *p);
+extern void mem_cgroup_replace_page_cache(struct page *oldpage,
+					struct page *newpage);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
@@ -369,6 +371,10 @@ static inline
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 {
 }
+static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
+				struct page *newpage)
+{
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
diff --git a/mm/filemap.c b/mm/filemap.c
index c4ee2e918bea..97f49ed35bd2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -393,24 +393,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range);
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 {
 	int error;
-	struct mem_cgroup *memcg = NULL;
 
 	VM_BUG_ON(!PageLocked(old));
 	VM_BUG_ON(!PageLocked(new));
 	VM_BUG_ON(new->mapping);
 
-	/*
-	 * This is not page migration, but prepare_migration and
-	 * end_migration does enough work for charge replacement.
-	 *
-	 * In the longer term we probably want a specialized function
-	 * for moving the charge from old to new in a more efficient
-	 * manner.
-	 */
-	error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
-	if (error)
-		return error;
-
 	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 	if (!error) {
 		struct address_space *mapping = old->mapping;
@@ -432,13 +419,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		if (PageSwapBacked(new))
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irq(&mapping->tree_lock);
+		/* mem_cgroup codes must not be called under tree_lock */
+		mem_cgroup_replace_page_cache(old, new);
 		radix_tree_preload_end();
 		if (freepage)
 			freepage(old);
 		page_cache_release(old);
-		mem_cgroup_end_migration(memcg, old, new, true);
-	} else {
-		mem_cgroup_end_migration(memcg, old, new, false);
 	}
 
 	return error;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d87aa3510c5e..0b2d4036f1cd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3432,6 +3432,50 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
+/*
+ * At replace page cache, newpage is not under any memcg but it's on
+ * LRU. So, this function doesn't touch res_counter but handles LRU
+ * in correct way. Both pages are locked so we cannot race with uncharge.
+ */
+void mem_cgroup_replace_page_cache(struct page *oldpage,
+				  struct page *newpage)
+{
+	struct mem_cgroup *memcg;
+	struct page_cgroup *pc;
+	struct zone *zone;
+	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
+	unsigned long flags;
+
+	if (mem_cgroup_disabled())
+		return;
+
+	pc = lookup_page_cgroup(oldpage);
+	/* fix accounting on old pages */
+	lock_page_cgroup(pc);
+	memcg = pc->mem_cgroup;
+	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -1);
+	ClearPageCgroupUsed(pc);
+	unlock_page_cgroup(pc);
+
+	if (PageSwapBacked(oldpage))
+		type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+
+	zone = page_zone(newpage);
+	pc = lookup_page_cgroup(newpage);
+	/*
+	 * Even if newpage->mapping was NULL before starting replacement,
+	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
+	 * LRU while we overwrite pc->mem_cgroup.
+	 */
+	spin_lock_irqsave(&zone->lru_lock, flags);
+	if (PageLRU(newpage))
+		del_page_from_lru_list(zone, newpage, page_lru(newpage));
+	__mem_cgroup_commit_charge(memcg, newpage, 1, pc, type);
+	if (PageLRU(newpage))
+		add_page_to_lru_list(zone, newpage, page_lru(newpage));
+	spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+
 #ifdef CONFIG_DEBUG_VM
 static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 {
-- 
cgit v1.2.3


From 5660048ccac8735d9bc0a46325a02e6a6518b5b2 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:17:59 -0800
Subject: mm: move memcg hierarchy reclaim to generic reclaim code

Memory cgroup limit reclaim and traditional global pressure reclaim will
soon share the same code to reclaim from a hierarchical tree of memory
cgroups.

In preparation of this, move the two right next to each other in
shrink_zone().

The mem_cgroup_hierarchical_reclaim() polymath is split into a soft
limit reclaim function, which still does hierarchy walking on its own,
and a limit (shrinking) reclaim function, which relies on generic
reclaim code to walk the hierarchy.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  24 +++++++
 mm/memcontrol.c            | 169 +++++++++++++++++++++++----------------------
 mm/vmscan.c                |  43 ++++++++++--
 3 files changed, 148 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3558a5e268cf..3b99dce85293 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -40,6 +40,12 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct mem_cgroup *mem_cont,
 					int active, int file);
 
+struct mem_cgroup_reclaim_cookie {
+	struct zone *zone;
+	int priority;
+	unsigned int generation;
+};
+
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /*
  * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -106,6 +112,11 @@ mem_cgroup_prepare_migration(struct page *page,
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	struct page *oldpage, struct page *newpage, bool migration_ok);
 
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+				   struct mem_cgroup *,
+				   struct mem_cgroup_reclaim_cookie *);
+void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
+
 /*
  * For memory reclaim.
  */
@@ -281,6 +292,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 {
 }
 
+static inline struct mem_cgroup *
+mem_cgroup_iter(struct mem_cgroup *root,
+		struct mem_cgroup *prev,
+		struct mem_cgroup_reclaim_cookie *reclaim)
+{
+	return NULL;
+}
+
+static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
+					 struct mem_cgroup *prev)
+{
+}
+
 static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
 {
 	return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bec451da7def..750ed1449955 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -370,8 +370,6 @@ enum charge_type {
 #define MEM_CGROUP_RECLAIM_NOSWAP	(1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
 #define MEM_CGROUP_RECLAIM_SHRINK_BIT	0x1
 #define MEM_CGROUP_RECLAIM_SHRINK	(1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
-#define MEM_CGROUP_RECLAIM_SOFT_BIT	0x2
-#define MEM_CGROUP_RECLAIM_SOFT		(1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
 
 static void mem_cgroup_get(struct mem_cgroup *memcg);
 static void mem_cgroup_put(struct mem_cgroup *memcg);
@@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
 	return memcg;
 }
 
-struct mem_cgroup_reclaim_cookie {
-	struct zone *zone;
-	int priority;
-	unsigned int generation;
-};
-
-static struct mem_cgroup *
-mem_cgroup_iter(struct mem_cgroup *root,
-		struct mem_cgroup *prev,
-		struct mem_cgroup_reclaim_cookie *reclaim)
+/**
+ * mem_cgroup_iter - iterate over memory cgroup hierarchy
+ * @root: hierarchy root
+ * @prev: previously returned memcg, NULL on first invocation
+ * @reclaim: cookie for shared reclaim walks, NULL for full walks
+ *
+ * Returns references to children of the hierarchy below @root, or
+ * @root itself, or %NULL after a full round-trip.
+ *
+ * Caller must pass the return value in @prev on subsequent
+ * invocations for reference counting, or use mem_cgroup_iter_break()
+ * to cancel a hierarchy walk before the round-trip is complete.
+ *
+ * Reclaimers can specify a zone and a priority level in @reclaim to
+ * divide up the memcgs in the hierarchy among all concurrent
+ * reclaimers operating on the same zone and priority.
+ */
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+				   struct mem_cgroup *prev,
+				   struct mem_cgroup_reclaim_cookie *reclaim)
 {
 	struct mem_cgroup *memcg = NULL;
 	int id = 0;
 
+	if (mem_cgroup_disabled())
+		return NULL;
+
 	if (!root)
 		root = root_mem_cgroup;
 
@@ -926,8 +937,13 @@ mem_cgroup_iter(struct mem_cgroup *root,
 	return memcg;
 }
 
-static void mem_cgroup_iter_break(struct mem_cgroup *root,
-				  struct mem_cgroup *prev)
+/**
+ * mem_cgroup_iter_break - abort a hierarchy walk prematurely
+ * @root: hierarchy root
+ * @prev: last visited hierarchy member as returned by mem_cgroup_iter()
+ */
+void mem_cgroup_iter_break(struct mem_cgroup *root,
+			   struct mem_cgroup *prev)
 {
 	if (!root)
 		root = root_mem_cgroup;
@@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
 	return min(limit, memsw);
 }
 
+static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
+					gfp_t gfp_mask,
+					unsigned long flags)
+{
+	unsigned long total = 0;
+	bool noswap = false;
+	int loop;
+
+	if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
+		noswap = true;
+	if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum)
+		noswap = true;
+
+	for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
+		if (loop)
+			drain_all_stock_async(memcg);
+		total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
+		/*
+		 * Allow limit shrinkers, which are triggered directly
+		 * by userspace, to catch signals and stop reclaim
+		 * after minimal progress, regardless of the margin.
+		 */
+		if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
+			break;
+		if (mem_cgroup_margin(memcg))
+			break;
+		/*
+		 * If nothing was reclaimed after two attempts, there
+		 * may be no reclaimable pages in this hierarchy.
+		 */
+		if (loop && !total)
+			break;
+	}
+	return total;
+}
+
 /**
  * test_mem_cgroup_node_reclaimable
  * @mem: the target memcg
@@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
 }
 #endif
 
-/*
- * Scan the hierarchy if needed to reclaim memory. We remember the last child
- * we reclaimed from, so that we don't end up penalizing one child extensively
- * based on its position in the children list.
- *
- * root_memcg is the original ancestor that we've been reclaim from.
- *
- * We give up and return to the caller when we visit root_memcg twice.
- * (other groups can be removed while we're walking....)
- *
- * If shrink==true, for avoiding to free too much, this returns immedieately.
- */
-static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
-						struct zone *zone,
-						gfp_t gfp_mask,
-						unsigned long reclaim_options,
-						unsigned long *total_scanned)
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
+				   struct zone *zone,
+				   gfp_t gfp_mask,
+				   unsigned long *total_scanned)
 {
 	struct mem_cgroup *victim = NULL;
-	int ret, total = 0;
+	int total = 0;
 	int loop = 0;
-	bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
-	bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
-	bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
 	unsigned long excess;
 	unsigned long nr_scanned;
 	struct mem_cgroup_reclaim_cookie reclaim = {
@@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
 
 	excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
 
-	/* If memsw_is_minimum==1, swap-out is of-no-use. */
-	if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
-		noswap = true;
-
 	while (1) {
 		victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
 		if (!victim) {
 			loop++;
-			/*
-			 * We are not draining per cpu cached charges during
-			 * soft limit reclaim  because global reclaim doesn't
-			 * care about charges. It tries to free some memory and
-			 * charges will not give any.
-			 */
-			if (!check_soft && loop >= 1)
-				drain_all_stock_async(root_memcg);
 			if (loop >= 2) {
 				/*
 				 * If we have not been able to reclaim
 				 * anything, it might because there are
 				 * no reclaimable pages under this hierarchy
 				 */
-				if (!check_soft || !total)
+				if (!total)
 					break;
 				/*
 				 * We want to do more targeted reclaim.
@@ -1761,30 +1785,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
 			}
 			continue;
 		}
-		if (!mem_cgroup_reclaimable(victim, noswap)) {
-			/* this cgroup's local usage == 0 */
+		if (!mem_cgroup_reclaimable(victim, false))
 			continue;
-		}
-		/* we use swappiness of local cgroup */
-		if (check_soft) {
-			ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
-				noswap, zone, &nr_scanned);
-			*total_scanned += nr_scanned;
-		} else
-			ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
-						noswap);
-		total += ret;
-		/*
-		 * At shrinking usage, we can't check we should stop here or
-		 * reclaim more. It's depends on callers. last_scanned_child
-		 * will work enough for keeping fairness under tree.
-		 */
-		if (shrink)
-			break;
-		if (check_soft) {
-			if (!res_counter_soft_limit_excess(&root_memcg->res))
-				break;
-		} else if (mem_cgroup_margin(root_memcg))
+		total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
+						     zone, &nr_scanned);
+		*total_scanned += nr_scanned;
+		if (!res_counter_soft_limit_excess(&root_memcg->res))
 			break;
 	}
 	mem_cgroup_iter_break(root_memcg, victim);
@@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	if (!(gfp_mask & __GFP_WAIT))
 		return CHARGE_WOULDBLOCK;
 
-	ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
-					      gfp_mask, flags, NULL);
+	ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
 		return CHARGE_RETRY;
 	/*
@@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
-						MEM_CGROUP_RECLAIM_SHRINK,
-						NULL);
+		mem_cgroup_reclaim(memcg, GFP_KERNEL,
+				   MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 		/* Usage is reduced ? */
   		if (curusage >= oldusage)
@@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
-						MEM_CGROUP_RECLAIM_NOSWAP |
-						MEM_CGROUP_RECLAIM_SHRINK,
-						NULL);
+		mem_cgroup_reclaim(memcg, GFP_KERNEL,
+				   MEM_CGROUP_RECLAIM_NOSWAP |
+				   MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
@@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 			break;
 
 		nr_scanned = 0;
-		reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
-						gfp_mask,
-						MEM_CGROUP_RECLAIM_SOFT,
-						&nr_scanned);
+		reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
+						    gfp_mask, &nr_scanned);
 		nr_reclaimed += reclaimed;
 		*total_scanned += nr_scanned;
 		spin_lock(&mctz->lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e0627d07c3ac..136c7eb0ad88 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2104,12 +2104,43 @@ restart:
 static void shrink_zone(int priority, struct zone *zone,
 			struct scan_control *sc)
 {
-	struct mem_cgroup_zone mz = {
-		.mem_cgroup = sc->target_mem_cgroup,
+	struct mem_cgroup *root = sc->target_mem_cgroup;
+	struct mem_cgroup_reclaim_cookie reclaim = {
 		.zone = zone,
+		.priority = priority,
 	};
+	struct mem_cgroup *memcg;
+
+	if (global_reclaim(sc)) {
+		struct mem_cgroup_zone mz = {
+			.mem_cgroup = NULL,
+			.zone = zone,
+		};
+
+		shrink_mem_cgroup_zone(priority, &mz, sc);
+		return;
+	}
+
+	memcg = mem_cgroup_iter(root, NULL, &reclaim);
+	do {
+		struct mem_cgroup_zone mz = {
+			.mem_cgroup = memcg,
+			.zone = zone,
+		};
 
-	shrink_mem_cgroup_zone(priority, &mz, sc);
+		shrink_mem_cgroup_zone(priority, &mz, sc);
+		/*
+		 * Limit reclaim has historically picked one memcg and
+		 * scanned it with decreasing priority levels until
+		 * nr_to_reclaim had been reclaimed.  This priority
+		 * cycle is thus over after a single memcg.
+		 */
+		if (!global_reclaim(sc)) {
+			mem_cgroup_iter_break(root, memcg);
+			break;
+		}
+		memcg = mem_cgroup_iter(root, memcg, &reclaim);
+	} while (memcg);
 }
 
 /*
@@ -2374,6 +2405,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 		.order = 0,
 		.target_mem_cgroup = mem,
 	};
+	struct mem_cgroup_zone mz = {
+		.mem_cgroup = mem,
+		.zone = zone,
+	};
 
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2389,7 +2424,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 	 * will pick up pages from other mem cgroup's as well. We hack
 	 * the priority and make it zero.
 	 */
-	shrink_zone(0, zone, &sc);
+	shrink_mem_cgroup_zone(0, &mz, &sc);
 
 	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
-- 
cgit v1.2.3


From 6290df545814990ca2663baf6e894669132d5f73 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:18:10 -0800
Subject: mm: collect LRU list heads into struct lruvec

Having a unified structure with a LRU list set for both global zones and
per-memcg zones allows to keep that code simple which deals with LRU
lists and does not care about the container itself.

Once the per-memcg LRU lists directly link struct pages, the isolation
function and all other list manipulations are shared between the memcg
case and the global LRU case.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h |  2 +-
 include/linux/mmzone.h    | 10 ++++++----
 mm/memcontrol.c           | 17 +++++++----------
 mm/page_alloc.c           |  2 +-
 mm/swap.c                 | 11 +++++------
 mm/vmscan.c               | 10 +++++-----
 6 files changed, 25 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 8f7d24712dc1..e6a7ffe16d31 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -33,7 +33,7 @@ __add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
 static inline void
 add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
-	__add_page_to_lru_list(zone, page, l, &zone->lru[l].list);
+	__add_page_to_lru_list(zone, page, l, &zone->lruvec.lists[l]);
 }
 
 static inline void
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ca6ca92418a6..42e544cd4c9f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -159,6 +159,10 @@ static inline int is_unevictable_lru(enum lru_list l)
 	return (l == LRU_UNEVICTABLE);
 }
 
+struct lruvec {
+	struct list_head lists[NR_LRU_LISTS];
+};
+
 /* Mask used at gathering information at once (see memcontrol.c) */
 #define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE))
 #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
@@ -364,10 +368,8 @@ struct zone {
 	ZONE_PADDING(_pad1_)
 
 	/* Fields commonly accessed by the page reclaim scanner */
-	spinlock_t		lru_lock;	
-	struct zone_lru {
-		struct list_head list;
-	} lru[NR_LRU_LISTS];
+	spinlock_t		lru_lock;
+	struct lruvec		lruvec;
 
 	struct zone_reclaim_stat reclaim_stat;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ad7f36f676ff..6e7f849a1a9e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -134,10 +134,7 @@ struct mem_cgroup_reclaim_iter {
  * per-zone information in memory controller.
  */
 struct mem_cgroup_per_zone {
-	/*
-	 * spin_lock to protect the per cgroup LRU
-	 */
-	struct list_head	lists[NR_LRU_LISTS];
+	struct lruvec		lruvec;
 	unsigned long		count[NR_LRU_LISTS];
 
 	struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
@@ -1061,7 +1058,7 @@ void mem_cgroup_rotate_reclaimable_page(struct page *page)
 	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
 	smp_rmb();
 	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move_tail(&pc->lru, &mz->lists[lru]);
+	list_move_tail(&pc->lru, &mz->lruvec.lists[lru]);
 }
 
 void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
@@ -1079,7 +1076,7 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
 	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
 	smp_rmb();
 	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move(&pc->lru, &mz->lists[lru]);
+	list_move(&pc->lru, &mz->lruvec.lists[lru]);
 }
 
 void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
@@ -1109,7 +1106,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
 	/* huge page split is done under lru_lock. so, we have no races. */
 	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
 	SetPageCgroupAcctLRU(pc);
-	list_add(&pc->lru, &mz->lists[lru]);
+	list_add(&pc->lru, &mz->lruvec.lists[lru]);
 }
 
 /*
@@ -1307,7 +1304,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 
 	BUG_ON(!mem_cont);
 	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
-	src = &mz->lists[lru];
+	src = &mz->lruvec.lists[lru];
 
 	scan = 0;
 	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
@@ -3738,7 +3735,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 
 	zone = &NODE_DATA(node)->node_zones[zid];
 	mz = mem_cgroup_zoneinfo(memcg, node, zid);
-	list = &mz->lists[lru];
+	list = &mz->lruvec.lists[lru];
 
 	loop = MEM_CGROUP_ZSTAT(mz, lru);
 	/* give some margin against EBUSY etc...*/
@@ -4864,7 +4861,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 		mz = &pn->zoneinfo[zone];
 		for_each_lru(l)
-			INIT_LIST_HEAD(&mz->lists[l]);
+			INIT_LIST_HEAD(&mz->lruvec.lists[l]);
 		mz->usage_in_excess = 0;
 		mz->on_tree = false;
 		mz->mem = memcg;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 794e6715c226..25c248eb7d5f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4288,7 +4288,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 
 		zone_pcp_init(zone);
 		for_each_lru(l)
-			INIT_LIST_HEAD(&zone->lru[l].list);
+			INIT_LIST_HEAD(&zone->lruvec.lists[l]);
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
 		zone->reclaim_stat.recent_scanned[0] = 0;
diff --git a/mm/swap.c b/mm/swap.c
index 67a09a633a09..76ef79d3857c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -236,7 +236,7 @@ static void pagevec_move_tail_fn(struct page *page, void *arg)
 
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		enum lru_list lru = page_lru_base_type(page);
-		list_move_tail(&page->lru, &zone->lru[lru].list);
+		list_move_tail(&page->lru, &zone->lruvec.lists[lru]);
 		mem_cgroup_rotate_reclaimable_page(page);
 		(*pgmoved)++;
 	}
@@ -480,7 +480,7 @@ static void lru_deactivate_fn(struct page *page, void *arg)
 		 * The page's writeback ends up during pagevec
 		 * We moves tha page into tail of inactive.
 		 */
-		list_move_tail(&page->lru, &zone->lru[lru].list);
+		list_move_tail(&page->lru, &zone->lruvec.lists[lru]);
 		mem_cgroup_rotate_reclaimable_page(page);
 		__count_vm_event(PGROTATED);
 	}
@@ -654,7 +654,6 @@ void lru_add_page_tail(struct zone* zone,
 	int active;
 	enum lru_list lru;
 	const int file = 0;
-	struct list_head *head;
 
 	VM_BUG_ON(!PageHead(page));
 	VM_BUG_ON(PageCompound(page_tail));
@@ -674,10 +673,10 @@ void lru_add_page_tail(struct zone* zone,
 		}
 		update_page_reclaim_stat(zone, page_tail, file, active);
 		if (likely(PageLRU(page)))
-			head = page->lru.prev;
+			__add_page_to_lru_list(zone, page_tail, lru,
+					       page->lru.prev);
 		else
-			head = &zone->lru[lru].list;
-		__add_page_to_lru_list(zone, page_tail, lru, head);
+			add_page_to_lru_list(zone, page_tail, lru);
 	} else {
 		SetPageUnevictable(page_tail);
 		add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 024168cfdcb0..93cdc44a1693 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1250,8 +1250,8 @@ static unsigned long isolate_pages_global(unsigned long nr,
 		lru += LRU_ACTIVE;
 	if (file)
 		lru += LRU_FILE;
-	return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
-								mode, file);
+	return isolate_lru_pages(nr, &z->lruvec.lists[lru], dst,
+				 scanned, order, mode, file);
 }
 
 /*
@@ -1630,7 +1630,7 @@ static void move_active_pages_to_lru(struct zone *zone,
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 
-		list_move(&page->lru, &zone->lru[lru].list);
+		list_move(&page->lru, &zone->lruvec.lists[lru]);
 		mem_cgroup_add_lru_list(page, lru);
 		pgmoved += hpage_nr_pages(page);
 
@@ -3448,7 +3448,7 @@ retry:
 		enum lru_list l = page_lru_base_type(page);
 
 		__dec_zone_state(zone, NR_UNEVICTABLE);
-		list_move(&page->lru, &zone->lru[l].list);
+		list_move(&page->lru, &zone->lruvec.lists[l]);
 		mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
 		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
 		__count_vm_event(UNEVICTABLE_PGRESCUED);
@@ -3457,7 +3457,7 @@ retry:
 		 * rotate unevictable list
 		 */
 		SetPageUnevictable(page);
-		list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
+		list_move(&page->lru, &zone->lruvec.lists[LRU_UNEVICTABLE]);
 		mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
 		if (page_evictable(page, NULL))
 			goto retry;
-- 
cgit v1.2.3


From 925b7673cce39116ce61e7a06683a4a0dad1e72a Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:18:15 -0800
Subject: mm: make per-memcg LRU lists exclusive

Now that all code that operated on global per-zone LRU lists is
converted to operate on per-memory cgroup LRU lists instead, there is no
reason to keep the double-LRU scheme around any longer.

The pc->lru member is removed and page->lru is linked directly to the
per-memory cgroup LRU lists, which removes two pointers from a
descriptor that exists for every page frame in the system.

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Ying Han <yinghan@google.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h  |  51 ++++----
 include/linux/mm_inline.h   |  21 ++-
 include/linux/page_cgroup.h |   1 -
 mm/memcontrol.c             | 311 +++++++++++++++++++++-----------------------
 mm/page_cgroup.c            |   1 -
 mm/swap.c                   |  23 ++--
 mm/vmscan.c                 |  64 +++++----
 7 files changed, 225 insertions(+), 247 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3b99dce85293..e2f8e7caf04b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -32,14 +32,6 @@ enum mem_cgroup_page_stat_item {
 	MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */
 };
 
-extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
-					struct list_head *dst,
-					unsigned long *scanned, int order,
-					isolate_mode_t mode,
-					struct zone *z,
-					struct mem_cgroup *mem_cont,
-					int active, int file);
-
 struct mem_cgroup_reclaim_cookie {
 	struct zone *zone;
 	int priority;
@@ -69,13 +61,14 @@ extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
 
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
-extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
-extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
-extern void mem_cgroup_rotate_reclaimable_page(struct page *page);
-extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
-extern void mem_cgroup_del_lru(struct page *page);
-extern void mem_cgroup_move_lists(struct page *page,
-				  enum lru_list from, enum lru_list to);
+
+struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
+struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *,
+				       enum lru_list);
+void mem_cgroup_lru_del_list(struct page *, enum lru_list);
+void mem_cgroup_lru_del(struct page *);
+struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *,
+					 enum lru_list, enum lru_list);
 
 /* For coalescing uncharge for reducing memcg' overhead*/
 extern void mem_cgroup_uncharge_start(void);
@@ -223,33 +216,33 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
-static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
-{
-}
-
-static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
+static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
+						    struct mem_cgroup *memcg)
 {
-	return ;
+	return &zone->lruvec;
 }
 
-static inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
+static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone,
+						     struct page *page,
+						     enum lru_list lru)
 {
-	return ;
+	return &zone->lruvec;
 }
 
-static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
+static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 {
-	return ;
 }
 
-static inline void mem_cgroup_del_lru(struct page *page)
+static inline void mem_cgroup_lru_del(struct page *page)
 {
-	return ;
 }
 
-static inline void
-mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
+static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
+						       struct page *page,
+						       enum lru_list from,
+						       enum lru_list to)
 {
+	return &zone->lruvec;
 }
 
 static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e6a7ffe16d31..4e3478e71926 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -21,27 +21,22 @@ static inline int page_is_file_cache(struct page *page)
 	return !PageSwapBacked(page);
 }
 
-static inline void
-__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
-		       struct list_head *head)
-{
-	list_add(&page->lru, head);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
-	mem_cgroup_add_lru_list(page, l);
-}
-
 static inline void
 add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
-	__add_page_to_lru_list(zone, page, l, &zone->lruvec.lists[l]);
+	struct lruvec *lruvec;
+
+	lruvec = mem_cgroup_lru_add_list(zone, page, l);
+	list_add(&page->lru, &lruvec->lists[l]);
+	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
 }
 
 static inline void
 del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 {
+	mem_cgroup_lru_del_list(page, l);
 	list_del(&page->lru);
 	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
-	mem_cgroup_del_lru_list(page, l);
 }
 
 /**
@@ -64,7 +59,6 @@ del_page_from_lru(struct zone *zone, struct page *page)
 {
 	enum lru_list l;
 
-	list_del(&page->lru);
 	if (PageUnevictable(page)) {
 		__ClearPageUnevictable(page);
 		l = LRU_UNEVICTABLE;
@@ -75,8 +69,9 @@ del_page_from_lru(struct zone *zone, struct page *page)
 			l += LRU_ACTIVE;
 		}
 	}
+	mem_cgroup_lru_del_list(page, l);
+	list_del(&page->lru);
 	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
-	mem_cgroup_del_lru_list(page, l);
 }
 
 /**
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 961ecc7d30bc..5bae7535c202 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -31,7 +31,6 @@ enum {
 struct page_cgroup {
 	unsigned long flags;
 	struct mem_cgroup *mem_cgroup;
-	struct list_head lru;		/* per cgroup LRU list */
 };
 
 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6e7f849a1a9e..972878b648c2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -995,6 +995,27 @@ out:
 }
 EXPORT_SYMBOL(mem_cgroup_count_vm_event);
 
+/**
+ * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
+ * @zone: zone of the wanted lruvec
+ * @mem: memcg of the wanted lruvec
+ *
+ * Returns the lru list vector holding pages for the given @zone and
+ * @mem.  This can be the global zone lruvec, if the memory controller
+ * is disabled.
+ */
+struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
+				      struct mem_cgroup *memcg)
+{
+	struct mem_cgroup_per_zone *mz;
+
+	if (mem_cgroup_disabled())
+		return &zone->lruvec;
+
+	mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone));
+	return &mz->lruvec;
+}
+
 /*
  * Following LRU functions are allowed to be used without PCG_LOCK.
  * Operations are called by routine of global LRU independently from memcg.
@@ -1009,104 +1030,123 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event);
  * When moving account, the page is not on LRU. It's isolated.
  */
 
-void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
+/**
+ * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec
+ * @zone: zone of the page
+ * @page: the page
+ * @lru: current lru
+ *
+ * This function accounts for @page being added to @lru, and returns
+ * the lruvec for the given @zone and the memcg @page is charged to.
+ *
+ * The callsite is then responsible for physically linking the page to
+ * the returned lruvec->lists[@lru].
+ */
+struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
+				       enum lru_list lru)
 {
-	struct page_cgroup *pc;
 	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup *memcg;
+	struct page_cgroup *pc;
 
 	if (mem_cgroup_disabled())
-		return;
+		return &zone->lruvec;
+
 	pc = lookup_page_cgroup(page);
-	/* can happen while we handle swapcache. */
-	if (!TestClearPageCgroupAcctLRU(pc))
-		return;
-	VM_BUG_ON(!pc->mem_cgroup);
+	VM_BUG_ON(PageCgroupAcctLRU(pc));
 	/*
-	 * We don't check PCG_USED bit. It's cleared when the "page" is finally
-	 * removed from global LRU.
+	 * putback:				charge:
+	 * SetPageLRU				SetPageCgroupUsed
+	 * smp_mb				smp_mb
+	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
+	 *
+	 * Ensure that one of the two sides adds the page to the memcg
+	 * LRU during a race.
 	 */
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	/* huge page split is done under lru_lock. so, we have no races. */
-	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
-	VM_BUG_ON(list_empty(&pc->lru));
-	list_del_init(&pc->lru);
-}
-
-void mem_cgroup_del_lru(struct page *page)
-{
-	mem_cgroup_del_lru_list(page, page_lru(page));
+	smp_mb();
+	/*
+	 * If the page is uncharged, it may be freed soon, but it
+	 * could also be swap cache (readahead, swapoff) that needs to
+	 * be reclaimable in the future.  root_mem_cgroup will babysit
+	 * it for the time being.
+	 */
+	if (PageCgroupUsed(pc)) {
+		/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
+		smp_rmb();
+		memcg = pc->mem_cgroup;
+		SetPageCgroupAcctLRU(pc);
+	} else
+		memcg = root_mem_cgroup;
+	mz = page_cgroup_zoneinfo(memcg, page);
+	/* compound_order() is stabilized through lru_lock */
+	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
+	return &mz->lruvec;
 }
 
-/*
- * Writeback is about to end against a page which has been marked for immediate
- * reclaim.  If it still appears to be reclaimable, move it to the tail of the
- * inactive list.
+/**
+ * mem_cgroup_lru_del_list - account for removing an lru page
+ * @page: the page
+ * @lru: target lru
+ *
+ * This function accounts for @page being removed from @lru.
+ *
+ * The callsite is then responsible for physically unlinking
+ * @page->lru.
  */
-void mem_cgroup_rotate_reclaimable_page(struct page *page)
+void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 {
 	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
-	enum lru_list lru = page_lru(page);
 
 	if (mem_cgroup_disabled())
 		return;
 
 	pc = lookup_page_cgroup(page);
-	/* unused page is not rotated. */
-	if (!PageCgroupUsed(pc))
-		return;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move_tail(&pc->lru, &mz->lruvec.lists[lru]);
+	/*
+	 * root_mem_cgroup babysits uncharged LRU pages, but
+	 * PageCgroupUsed is cleared when the page is about to get
+	 * freed.  PageCgroupAcctLRU remembers whether the
+	 * LRU-accounting happened against pc->mem_cgroup or
+	 * root_mem_cgroup.
+	 */
+	if (TestClearPageCgroupAcctLRU(pc)) {
+		VM_BUG_ON(!pc->mem_cgroup);
+		memcg = pc->mem_cgroup;
+	} else
+		memcg = root_mem_cgroup;
+	mz = page_cgroup_zoneinfo(memcg, page);
+	/* huge page split is done under lru_lock. so, we have no races. */
+	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
 }
 
-void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
+void mem_cgroup_lru_del(struct page *page)
 {
-	struct mem_cgroup_per_zone *mz;
-	struct page_cgroup *pc;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	pc = lookup_page_cgroup(page);
-	/* unused page is not rotated. */
-	if (!PageCgroupUsed(pc))
-		return;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move(&pc->lru, &mz->lruvec.lists[lru]);
+	mem_cgroup_lru_del_list(page, page_lru(page));
 }
 
-void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
+/**
+ * mem_cgroup_lru_move_lists - account for moving a page between lrus
+ * @zone: zone of the page
+ * @page: the page
+ * @from: current lru
+ * @to: target lru
+ *
+ * This function accounts for @page being moved between the lrus @from
+ * and @to, and returns the lruvec for the given @zone and the memcg
+ * @page is charged to.
+ *
+ * The callsite is then responsible for physically relinking
+ * @page->lru to the returned lruvec->lists[@to].
+ */
+struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
+					 struct page *page,
+					 enum lru_list from,
+					 enum lru_list to)
 {
-	struct page_cgroup *pc;
-	struct mem_cgroup_per_zone *mz;
-
-	if (mem_cgroup_disabled())
-		return;
-	pc = lookup_page_cgroup(page);
-	VM_BUG_ON(PageCgroupAcctLRU(pc));
-	/*
-	 * putback:				charge:
-	 * SetPageLRU				SetPageCgroupUsed
-	 * smp_mb				smp_mb
-	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
-	 *
-	 * Ensure that one of the two sides adds the page to the memcg
-	 * LRU during a race.
-	 */
-	smp_mb();
-	if (!PageCgroupUsed(pc))
-		return;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	/* huge page split is done under lru_lock. so, we have no races. */
-	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
-	SetPageCgroupAcctLRU(pc);
-	list_add(&pc->lru, &mz->lruvec.lists[lru]);
+	/* XXX: Optimize this, especially for @from == @to */
+	mem_cgroup_lru_del_list(page, from);
+	return mem_cgroup_lru_add_list(zone, page, to);
 }
 
 /*
@@ -1117,6 +1157,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
  */
 static void mem_cgroup_lru_del_before_commit(struct page *page)
 {
+	enum lru_list lru;
 	unsigned long flags;
 	struct zone *zone = page_zone(page);
 	struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -1133,17 +1174,28 @@ static void mem_cgroup_lru_del_before_commit(struct page *page)
 		return;
 
 	spin_lock_irqsave(&zone->lru_lock, flags);
+	lru = page_lru(page);
 	/*
-	 * Forget old LRU when this page_cgroup is *not* used. This Used bit
-	 * is guarded by lock_page() because the page is SwapCache.
+	 * The uncharged page could still be registered to the LRU of
+	 * the stale pc->mem_cgroup.
+	 *
+	 * As pc->mem_cgroup is about to get overwritten, the old LRU
+	 * accounting needs to be taken care of.  Let root_mem_cgroup
+	 * babysit the page until the new memcg is responsible for it.
+	 *
+	 * The PCG_USED bit is guarded by lock_page() as the page is
+	 * swapcache/pagecache.
 	 */
-	if (!PageCgroupUsed(pc))
-		mem_cgroup_del_lru_list(page, page_lru(page));
+	if (PageLRU(page) && PageCgroupAcctLRU(pc) && !PageCgroupUsed(pc)) {
+		del_page_from_lru_list(zone, page, lru);
+		add_page_to_lru_list(zone, page, lru);
+	}
 	spin_unlock_irqrestore(&zone->lru_lock, flags);
 }
 
 static void mem_cgroup_lru_add_after_commit(struct page *page)
 {
+	enum lru_list lru;
 	unsigned long flags;
 	struct zone *zone = page_zone(page);
 	struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -1161,22 +1213,22 @@ static void mem_cgroup_lru_add_after_commit(struct page *page)
 	if (likely(!PageLRU(page)))
 		return;
 	spin_lock_irqsave(&zone->lru_lock, flags);
-	/* link when the page is linked to LRU but page_cgroup isn't */
-	if (PageLRU(page) && !PageCgroupAcctLRU(pc))
-		mem_cgroup_add_lru_list(page, page_lru(page));
+	lru = page_lru(page);
+	/*
+	 * If the page is not on the LRU, someone will soon put it
+	 * there.  If it is, and also already accounted for on the
+	 * memcg-side, it must be on the right lruvec as setting
+	 * pc->mem_cgroup and PageCgroupUsed is properly ordered.
+	 * Otherwise, root_mem_cgroup has been babysitting the page
+	 * during the charge.  Move it to the new memcg now.
+	 */
+	if (PageLRU(page) && !PageCgroupAcctLRU(pc)) {
+		del_page_from_lru_list(zone, page, lru);
+		add_page_to_lru_list(zone, page, lru);
+	}
 	spin_unlock_irqrestore(&zone->lru_lock, flags);
 }
 
-
-void mem_cgroup_move_lists(struct page *page,
-			   enum lru_list from, enum lru_list to)
-{
-	if (mem_cgroup_disabled())
-		return;
-	mem_cgroup_del_lru_list(page, from);
-	mem_cgroup_add_lru_list(page, to);
-}
-
 /*
  * Checks whether given mem is same or in the root_mem_cgroup's
  * hierarchy subtree
@@ -1282,68 +1334,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
 	return &mz->reclaim_stat;
 }
 
-unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
-					struct list_head *dst,
-					unsigned long *scanned, int order,
-					isolate_mode_t mode,
-					struct zone *z,
-					struct mem_cgroup *mem_cont,
-					int active, int file)
-{
-	unsigned long nr_taken = 0;
-	struct page *page;
-	unsigned long scan;
-	LIST_HEAD(pc_list);
-	struct list_head *src;
-	struct page_cgroup *pc, *tmp;
-	int nid = zone_to_nid(z);
-	int zid = zone_idx(z);
-	struct mem_cgroup_per_zone *mz;
-	int lru = LRU_FILE * file + active;
-	int ret;
-
-	BUG_ON(!mem_cont);
-	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
-	src = &mz->lruvec.lists[lru];
-
-	scan = 0;
-	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
-		if (scan >= nr_to_scan)
-			break;
-
-		if (unlikely(!PageCgroupUsed(pc)))
-			continue;
-
-		page = lookup_cgroup_page(pc);
-
-		if (unlikely(!PageLRU(page)))
-			continue;
-
-		scan++;
-		ret = __isolate_lru_page(page, mode, file);
-		switch (ret) {
-		case 0:
-			list_move(&page->lru, dst);
-			mem_cgroup_del_lru(page);
-			nr_taken += hpage_nr_pages(page);
-			break;
-		case -EBUSY:
-			/* we don't affect global LRU but rotate in our LRU */
-			mem_cgroup_rotate_lru_list(page, page_lru(page));
-			break;
-		default:
-			break;
-		}
-	}
-
-	*scanned = scan;
-
-	trace_mm_vmscan_memcg_isolate(0, nr_to_scan, scan, nr_taken,
-				      0, 0, 0, mode);
-
-	return nr_taken;
-}
-
 #define mem_cgroup_from_res_counter(counter, member)	\
 	container_of(counter, struct mem_cgroup, member)
 
@@ -3726,11 +3716,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 				int node, int zid, enum lru_list lru)
 {
-	struct zone *zone;
 	struct mem_cgroup_per_zone *mz;
-	struct page_cgroup *pc, *busy;
 	unsigned long flags, loop;
 	struct list_head *list;
+	struct page *busy;
+	struct zone *zone;
 	int ret = 0;
 
 	zone = &NODE_DATA(node)->node_zones[zid];
@@ -3742,6 +3732,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 	loop += 256;
 	busy = NULL;
 	while (loop--) {
+		struct page_cgroup *pc;
 		struct page *page;
 
 		ret = 0;
@@ -3750,16 +3741,16 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 			spin_unlock_irqrestore(&zone->lru_lock, flags);
 			break;
 		}
-		pc = list_entry(list->prev, struct page_cgroup, lru);
-		if (busy == pc) {
-			list_move(&pc->lru, list);
+		page = list_entry(list->prev, struct page, lru);
+		if (busy == page) {
+			list_move(&page->lru, list);
 			busy = NULL;
 			spin_unlock_irqrestore(&zone->lru_lock, flags);
 			continue;
 		}
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
-		page = lookup_cgroup_page(pc);
+		pc = lookup_page_cgroup(page);
 
 		ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
 		if (ret == -ENOMEM)
@@ -3767,7 +3758,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 
 		if (ret == -EBUSY || ret == -EINVAL) {
 			/* found lock contention or "pc" is obsolete. */
-			busy = pc;
+			busy = page;
 			cond_resched();
 		} else
 			busy = NULL;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 2d123f94a8df..f59405a8d752 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -16,7 +16,6 @@ static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
 	pc->flags = 0;
 	set_page_cgroup_array_id(pc, id);
 	pc->mem_cgroup = NULL;
-	INIT_LIST_HEAD(&pc->lru);
 }
 static unsigned long total_usage;
 
diff --git a/mm/swap.c b/mm/swap.c
index 76ef79d3857c..126da2919f60 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -232,12 +232,14 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 static void pagevec_move_tail_fn(struct page *page, void *arg)
 {
 	int *pgmoved = arg;
-	struct zone *zone = page_zone(page);
 
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		enum lru_list lru = page_lru_base_type(page);
-		list_move_tail(&page->lru, &zone->lruvec.lists[lru]);
-		mem_cgroup_rotate_reclaimable_page(page);
+		struct lruvec *lruvec;
+
+		lruvec = mem_cgroup_lru_move_lists(page_zone(page),
+						   page, lru, lru);
+		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		(*pgmoved)++;
 	}
 }
@@ -476,12 +478,13 @@ static void lru_deactivate_fn(struct page *page, void *arg)
 		 */
 		SetPageReclaim(page);
 	} else {
+		struct lruvec *lruvec;
 		/*
 		 * The page's writeback ends up during pagevec
 		 * We moves tha page into tail of inactive.
 		 */
-		list_move_tail(&page->lru, &zone->lruvec.lists[lru]);
-		mem_cgroup_rotate_reclaimable_page(page);
+		lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru);
+		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		__count_vm_event(PGROTATED);
 	}
 
@@ -663,6 +666,8 @@ void lru_add_page_tail(struct zone* zone,
 	SetPageLRU(page_tail);
 
 	if (page_evictable(page_tail, NULL)) {
+		struct lruvec *lruvec;
+
 		if (PageActive(page)) {
 			SetPageActive(page_tail);
 			active = 1;
@@ -672,11 +677,13 @@ void lru_add_page_tail(struct zone* zone,
 			lru = LRU_INACTIVE_ANON;
 		}
 		update_page_reclaim_stat(zone, page_tail, file, active);
+		lruvec = mem_cgroup_lru_add_list(zone, page_tail, lru);
 		if (likely(PageLRU(page)))
-			__add_page_to_lru_list(zone, page_tail, lru,
-					       page->lru.prev);
+			list_add(&page_tail->lru, page->lru.prev);
 		else
-			add_page_to_lru_list(zone, page_tail, lru);
+			list_add(&page_tail->lru, &lruvec->lists[lru]);
+		__mod_zone_page_state(zone, NR_LRU_BASE + lru,
+				      hpage_nr_pages(page_tail));
 	} else {
 		SetPageUnevictable(page_tail);
 		add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 93cdc44a1693..813aae820a27 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1139,15 +1139,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 		switch (__isolate_lru_page(page, mode, file)) {
 		case 0:
+			mem_cgroup_lru_del(page);
 			list_move(&page->lru, dst);
-			mem_cgroup_del_lru(page);
 			nr_taken += hpage_nr_pages(page);
 			break;
 
 		case -EBUSY:
 			/* else it is being freed elsewhere */
 			list_move(&page->lru, src);
-			mem_cgroup_rotate_lru_list(page, page_lru(page));
 			continue;
 
 		default:
@@ -1197,8 +1196,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 				break;
 
 			if (__isolate_lru_page(cursor_page, mode, file) == 0) {
+				mem_cgroup_lru_del(cursor_page);
 				list_move(&cursor_page->lru, dst);
-				mem_cgroup_del_lru(cursor_page);
 				nr_taken += hpage_nr_pages(cursor_page);
 				nr_lumpy_taken++;
 				if (PageDirty(cursor_page))
@@ -1239,18 +1238,20 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 	return nr_taken;
 }
 
-static unsigned long isolate_pages_global(unsigned long nr,
-					struct list_head *dst,
-					unsigned long *scanned, int order,
-					isolate_mode_t mode,
-					struct zone *z,	int active, int file)
+static unsigned long isolate_pages(unsigned long nr, struct mem_cgroup_zone *mz,
+				   struct list_head *dst,
+				   unsigned long *scanned, int order,
+				   isolate_mode_t mode, int active, int file)
 {
+	struct lruvec *lruvec;
 	int lru = LRU_BASE;
+
+	lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
 	if (active)
 		lru += LRU_ACTIVE;
 	if (file)
 		lru += LRU_FILE;
-	return isolate_lru_pages(nr, &z->lruvec.lists[lru], dst,
+	return isolate_lru_pages(nr, &lruvec->lists[lru], dst,
 				 scanned, order, mode, file);
 }
 
@@ -1518,14 +1519,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
 
 	spin_lock_irq(&zone->lru_lock);
 
-	if (scanning_global_lru(mz)) {
-		nr_taken = isolate_pages_global(nr_to_scan, &page_list,
-			&nr_scanned, sc->order, reclaim_mode, zone, 0, file);
-	} else {
-		nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list,
-			&nr_scanned, sc->order, reclaim_mode, zone,
-			mz->mem_cgroup, 0, file);
-	}
+	nr_taken = isolate_pages(nr_to_scan, mz, &page_list,
+				 &nr_scanned, sc->order,
+				 reclaim_mode, 0, file);
 	if (global_reclaim(sc)) {
 		zone->pages_scanned += nr_scanned;
 		if (current_is_kswapd())
@@ -1625,13 +1621,15 @@ static void move_active_pages_to_lru(struct zone *zone,
 	pagevec_init(&pvec, 1);
 
 	while (!list_empty(list)) {
+		struct lruvec *lruvec;
+
 		page = lru_to_page(list);
 
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 
-		list_move(&page->lru, &zone->lruvec.lists[lru]);
-		mem_cgroup_add_lru_list(page, lru);
+		lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+		list_move(&page->lru, &lruvec->lists[lru]);
 		pgmoved += hpage_nr_pages(page);
 
 		if (!pagevec_add(&pvec, page) || list_empty(list)) {
@@ -1672,17 +1670,10 @@ static void shrink_active_list(unsigned long nr_pages,
 		reclaim_mode |= ISOLATE_CLEAN;
 
 	spin_lock_irq(&zone->lru_lock);
-	if (scanning_global_lru(mz)) {
-		nr_taken = isolate_pages_global(nr_pages, &l_hold,
-						&pgscanned, sc->order,
-						reclaim_mode, zone,
-						1, file);
-	} else {
-		nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
-						&pgscanned, sc->order,
-						reclaim_mode, zone,
-						mz->mem_cgroup, 1, file);
-	}
+
+	nr_taken = isolate_pages(nr_pages, mz, &l_hold,
+				 &pgscanned, sc->order,
+				 reclaim_mode, 1, file);
 
 	if (global_reclaim(sc))
 		zone->pages_scanned += pgscanned;
@@ -3440,16 +3431,18 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
  */
 static void check_move_unevictable_page(struct page *page, struct zone *zone)
 {
-	VM_BUG_ON(PageActive(page));
+	struct lruvec *lruvec;
 
+	VM_BUG_ON(PageActive(page));
 retry:
 	ClearPageUnevictable(page);
 	if (page_evictable(page, NULL)) {
 		enum lru_list l = page_lru_base_type(page);
 
 		__dec_zone_state(zone, NR_UNEVICTABLE);
-		list_move(&page->lru, &zone->lruvec.lists[l]);
-		mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
+		lruvec = mem_cgroup_lru_move_lists(zone, page,
+						   LRU_UNEVICTABLE, l);
+		list_move(&page->lru, &lruvec->lists[l]);
 		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
 		__count_vm_event(UNEVICTABLE_PGRESCUED);
 	} else {
@@ -3457,8 +3450,9 @@ retry:
 		 * rotate unevictable list
 		 */
 		SetPageUnevictable(page);
-		list_move(&page->lru, &zone->lruvec.lists[LRU_UNEVICTABLE]);
-		mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
+		lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE,
+						   LRU_UNEVICTABLE);
+		list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]);
 		if (page_evictable(page, NULL))
 			goto retry;
 	}
-- 
cgit v1.2.3


From 6b208e3f6e35aa76d254c395bdcd984b17c6b626 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:18:18 -0800
Subject: mm: memcg: remove unused node/section info from pc->flags

To find the page corresponding to a certain page_cgroup, the pc->flags
encoded the node or section ID with the base array to compare the pc
pointer to.

Now that the per-memory cgroup LRU lists link page descriptors directly,
there is no longer any code that knows the struct page_cgroup of a PFN
but not the struct page.

[hughd@google.com: remove unused node/section info from pc->flags fix]
Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 33 -------------------------
 mm/page_cgroup.c            | 59 ++++++---------------------------------------
 2 files changed, 7 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5bae7535c202..aaa60da8783c 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -121,39 +121,6 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
 	local_irq_restore(*flags);
 }
 
-#ifdef CONFIG_SPARSEMEM
-#define PCG_ARRAYID_WIDTH	SECTIONS_SHIFT
-#else
-#define PCG_ARRAYID_WIDTH	NODES_SHIFT
-#endif
-
-#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS)
-#error Not enough space left in pc->flags to store page_cgroup array IDs
-#endif
-
-/* pc->flags: ARRAY-ID | FLAGS */
-
-#define PCG_ARRAYID_MASK	((1UL << PCG_ARRAYID_WIDTH) - 1)
-
-#define PCG_ARRAYID_OFFSET	(BITS_PER_LONG - PCG_ARRAYID_WIDTH)
-/*
- * Zero the shift count for non-existent fields, to prevent compiler
- * warnings and ensure references are optimized away.
- */
-#define PCG_ARRAYID_SHIFT	(PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0))
-
-static inline void set_page_cgroup_array_id(struct page_cgroup *pc,
-					    unsigned long id)
-{
-	pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT);
-	pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT;
-}
-
-static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc)
-{
-	return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK;
-}
-
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct page_cgroup;
 
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index f59405a8d752..f0559e049e00 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -11,12 +11,6 @@
 #include <linux/swapops.h>
 #include <linux/kmemleak.h>
 
-static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
-{
-	pc->flags = 0;
-	set_page_cgroup_array_id(pc, id);
-	pc->mem_cgroup = NULL;
-}
 static unsigned long total_usage;
 
 #if !defined(CONFIG_SPARSEMEM)
@@ -41,28 +35,13 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
 	return base + offset;
 }
 
-struct page *lookup_cgroup_page(struct page_cgroup *pc)
-{
-	unsigned long pfn;
-	struct page *page;
-	pg_data_t *pgdat;
-
-	pgdat = NODE_DATA(page_cgroup_array_id(pc));
-	pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
-	page = pfn_to_page(pfn);
-	VM_BUG_ON(pc != lookup_page_cgroup(page));
-	return page;
-}
-
 static int __init alloc_node_page_cgroup(int nid)
 {
-	struct page_cgroup *base, *pc;
+	struct page_cgroup *base;
 	unsigned long table_size;
-	unsigned long start_pfn, nr_pages, index;
+	unsigned long nr_pages;
 
-	start_pfn = NODE_DATA(nid)->node_start_pfn;
 	nr_pages = NODE_DATA(nid)->node_spanned_pages;
-
 	if (!nr_pages)
 		return 0;
 
@@ -72,10 +51,6 @@ static int __init alloc_node_page_cgroup(int nid)
 			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 	if (!base)
 		return -ENOMEM;
-	for (index = 0; index < nr_pages; index++) {
-		pc = base + index;
-		init_page_cgroup(pc, nid);
-	}
 	NODE_DATA(nid)->node_page_cgroup = base;
 	total_usage += table_size;
 	return 0;
@@ -116,23 +91,10 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
 	return section->page_cgroup + pfn;
 }
 
-struct page *lookup_cgroup_page(struct page_cgroup *pc)
-{
-	struct mem_section *section;
-	struct page *page;
-	unsigned long nr;
-
-	nr = page_cgroup_array_id(pc);
-	section = __nr_to_section(nr);
-	page = pfn_to_page(pc - section->page_cgroup);
-	VM_BUG_ON(pc != lookup_page_cgroup(page));
-	return page;
-}
-
 static void *__meminit alloc_page_cgroup(size_t size, int nid)
 {
+	gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
 	void *addr = NULL;
-	gfp_t flags = GFP_KERNEL | __GFP_NOWARN;
 
 	addr = alloc_pages_exact_nid(nid, size, flags);
 	if (addr) {
@@ -141,9 +103,9 @@ static void *__meminit alloc_page_cgroup(size_t size, int nid)
 	}
 
 	if (node_state(nid, N_HIGH_MEMORY))
-		addr = vmalloc_node(size, nid);
+		addr = vzalloc_node(size, nid);
 	else
-		addr = vmalloc(size);
+		addr = vzalloc(size);
 
 	return addr;
 }
@@ -166,14 +128,11 @@ static void free_page_cgroup(void *addr)
 
 static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
 {
-	struct page_cgroup *base, *pc;
 	struct mem_section *section;
+	struct page_cgroup *base;
 	unsigned long table_size;
-	unsigned long nr;
-	int index;
 
-	nr = pfn_to_section_nr(pfn);
-	section = __nr_to_section(nr);
+	section = __pfn_to_section(pfn);
 
 	if (section->page_cgroup)
 		return 0;
@@ -193,10 +152,6 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
 		return -ENOMEM;
 	}
 
-	for (index = 0; index < PAGES_PER_SECTION; index++) {
-		pc = base + index;
-		init_page_cgroup(pc, nr);
-	}
 	/*
 	 * The passed "pfn" may not be aligned to SECTION.  For the calculation
 	 * we need to apply a mask.
-- 
cgit v1.2.3


From e94c8a9cbce1aee4af9e1285802785481b7f93c5 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 12 Jan 2012 17:18:20 -0800
Subject: memcg: make mem_cgroup_split_huge_fixup() more efficient

In split_huge_page(), mem_cgroup_split_huge_fixup() is called to handle
page_cgroup modifcations.  It takes move_lock_page_cgroup() and modifies
page_cgroup and LRU accounting jobs and called HPAGE_PMD_SIZE - 1 times.

But thinking again,
  - compound_lock() is held at move_accout...then, it's not necessary
    to take move_lock_page_cgroup().
  - LRU is locked and all tail pages will go into the same LRU as
    head is now on.
  - page_cgroup is contiguous in huge page range.

This patch fixes mem_cgroup_split_huge_fixup() as to be called once per
hugepage and reduce costs for spliting.

[akpm@linux-foundation.org: fix typo, per Michal]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 ++---
 mm/huge_memory.c           |  3 ++-
 mm/memcontrol.c            | 34 +++++++++++++++++-----------------
 3 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e2f8e7caf04b..cee3761666f0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -163,7 +163,7 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg);
 
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
+void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
 
 #ifdef CONFIG_DEBUG_VM
@@ -379,8 +379,7 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
 	return 0;
 }
 
-static inline void mem_cgroup_split_huge_fixup(struct page *head,
-						struct page *tail)
+static inline void mem_cgroup_split_huge_fixup(struct page *head)
 {
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 36b3d988b4ef..db522e160cca 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1207,6 +1207,8 @@ static void __split_huge_page_refcount(struct page *page)
 	/* prevent PageLRU to go away from under us, and freeze lru stats */
 	spin_lock_irq(&zone->lru_lock);
 	compound_lock(page);
+	/* complete memcg works before add pages to LRU */
+	mem_cgroup_split_huge_fixup(page);
 
 	for (i = 1; i < HPAGE_PMD_NR; i++) {
 		struct page *page_tail = page + i;
@@ -1278,7 +1280,6 @@ static void __split_huge_page_refcount(struct page *page)
 		BUG_ON(!PageDirty(page_tail));
 		BUG_ON(!PageSwapBacked(page_tail));
 
-		mem_cgroup_split_huge_fixup(page, page_tail);
 
 		lru_add_page_tail(zone, page, page_tail);
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 972878b648c2..42174612cc0b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2553,39 +2553,39 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 			(1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
 /*
  * Because tail pages are not marked as "used", set it. We're under
- * zone->lru_lock, 'splitting on pmd' and compund_lock.
+ * zone->lru_lock, 'splitting on pmd' and compound_lock.
+ * charge/uncharge will be never happen and move_account() is done under
+ * compound_lock(), so we don't have to take care of races.
  */
-void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
+void mem_cgroup_split_huge_fixup(struct page *head)
 {
 	struct page_cgroup *head_pc = lookup_page_cgroup(head);
-	struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
-	unsigned long flags;
+	struct page_cgroup *pc;
+	int i;
 
 	if (mem_cgroup_disabled())
 		return;
-	/*
-	 * We have no races with charge/uncharge but will have races with
-	 * page state accounting.
-	 */
-	move_lock_page_cgroup(head_pc, &flags);
+	for (i = 1; i < HPAGE_PMD_NR; i++) {
+		pc = head_pc + i;
+		pc->mem_cgroup = head_pc->mem_cgroup;
+		smp_wmb();/* see __commit_charge() */
+		/*
+		 * LRU flags cannot be copied because we need to add tail
+		 * page to LRU by generic call and our hooks will be called.
+		 */
+		pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+	}
 
-	tail_pc->mem_cgroup = head_pc->mem_cgroup;
-	smp_wmb(); /* see __commit_charge() */
 	if (PageCgroupAcctLRU(head_pc)) {
 		enum lru_list lru;
 		struct mem_cgroup_per_zone *mz;
-
 		/*
-		 * LRU flags cannot be copied because we need to add tail
-		 *.page to LRU by generic call and our hook will be called.
 		 * We hold lru_lock, then, reduce counter directly.
 		 */
 		lru = page_lru(head);
 		mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
-		MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+		MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
 	}
-	tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
-	move_unlock_page_cgroup(head_pc, &flags);
 }
 #endif
 
-- 
cgit v1.2.3


From 72835c86ca15d0126354b73d5f29ce9194931c9b Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jweiner@redhat.com>
Date: Thu, 12 Jan 2012 17:18:32 -0800
Subject: mm: unify remaining mem_cont, mem, etc. variable names to memcg

Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 16 +++++++-------
 include/linux/oom.h        |  2 +-
 include/linux/rmap.h       |  4 ++--
 mm/memcontrol.c            | 52 ++++++++++++++++++++++++----------------------
 mm/oom_kill.c              | 38 ++++++++++++++++-----------------
 mm/rmap.c                  | 20 +++++++++---------
 mm/swapfile.c              |  9 ++++----
 mm/vmscan.c                | 12 +++++------
 8 files changed, 78 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index cee3761666f0..b80de520670b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,10 +54,10 @@ extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
 /* for swap handling */
 extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t mask, struct mem_cgroup **ptr);
+		struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
 extern void mem_cgroup_commit_charge_swapin(struct page *page,
-					struct mem_cgroup *ptr);
-extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
+					struct mem_cgroup *memcg);
+extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
 
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
@@ -101,7 +101,7 @@ extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
 
 extern int
 mem_cgroup_prepare_migration(struct page *page,
-	struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask);
+	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask);
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	struct page *oldpage, struct page *newpage, bool migration_ok);
 
@@ -186,17 +186,17 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 }
 
 static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
+		struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
 {
 	return 0;
 }
 
 static inline void mem_cgroup_commit_charge_swapin(struct page *page,
-					  struct mem_cgroup *ptr)
+					  struct mem_cgroup *memcg)
 {
 }
 
-static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
+static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
 {
 }
 
@@ -275,7 +275,7 @@ static inline struct cgroup_subsys_state
 
 static inline int
 mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-	struct mem_cgroup **ptr, gfp_t gfp_mask)
+	struct mem_cgroup **memcgp, gfp_t gfp_mask)
 {
 	return 0;
 }
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 6f9d04a85336..552fba9c7d5a 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -43,7 +43,7 @@ enum oom_constraint {
 extern void compare_swap_oom_score_adj(int old_val, int new_val);
 extern int test_set_oom_score_adj(int new_val);
 
-extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
+extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 			const nodemask_t *nodemask, unsigned long totalpages);
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1afb9954bbf1..1cdd62a2788a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -158,7 +158,7 @@ static inline void page_dup_rmap(struct page *page)
  * Called from mm/vmscan.c to handle paging out
  */
 int page_referenced(struct page *, int is_locked,
-			struct mem_cgroup *cnt, unsigned long *vm_flags);
+			struct mem_cgroup *memcg, unsigned long *vm_flags);
 int page_referenced_one(struct page *, struct vm_area_struct *,
 	unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
 
@@ -236,7 +236,7 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
 #define anon_vma_link(vma)	do {} while (0)
 
 static inline int page_referenced(struct page *page, int is_locked,
-				  struct mem_cgroup *cnt,
+				  struct mem_cgroup *memcg,
 				  unsigned long *vm_flags)
 {
 	*vm_flags = 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index abb66a2cba65..aeb23933a052 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2844,12 +2844,12 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
  */
 int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 				 struct page *page,
-				 gfp_t mask, struct mem_cgroup **ptr)
+				 gfp_t mask, struct mem_cgroup **memcgp)
 {
 	struct mem_cgroup *memcg;
 	int ret;
 
-	*ptr = NULL;
+	*memcgp = NULL;
 
 	if (mem_cgroup_disabled())
 		return 0;
@@ -2867,27 +2867,27 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 	memcg = try_get_mem_cgroup_from_page(page);
 	if (!memcg)
 		goto charge_cur_mm;
-	*ptr = memcg;
-	ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true);
+	*memcgp = memcg;
+	ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
 	css_put(&memcg->css);
 	return ret;
 charge_cur_mm:
 	if (unlikely(!mm))
 		mm = &init_mm;
-	return __mem_cgroup_try_charge(mm, mask, 1, ptr, true);
+	return __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
 }
 
 static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
+__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
 					enum charge_type ctype)
 {
 	if (mem_cgroup_disabled())
 		return;
-	if (!ptr)
+	if (!memcg)
 		return;
-	cgroup_exclude_rmdir(&ptr->css);
+	cgroup_exclude_rmdir(&memcg->css);
 
-	__mem_cgroup_commit_charge_lrucare(page, ptr, ctype);
+	__mem_cgroup_commit_charge_lrucare(page, memcg, ctype);
 	/*
 	 * Now swap is on-memory. This means this page may be
 	 * counted both as mem and swap....double count.
@@ -2897,21 +2897,22 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 	 */
 	if (do_swap_account && PageSwapCache(page)) {
 		swp_entry_t ent = {.val = page_private(page)};
+		struct mem_cgroup *swap_memcg;
 		unsigned short id;
-		struct mem_cgroup *memcg;
 
 		id = swap_cgroup_record(ent, 0);
 		rcu_read_lock();
-		memcg = mem_cgroup_lookup(id);
-		if (memcg) {
+		swap_memcg = mem_cgroup_lookup(id);
+		if (swap_memcg) {
 			/*
 			 * This recorded memcg can be obsolete one. So, avoid
 			 * calling css_tryget
 			 */
-			if (!mem_cgroup_is_root(memcg))
-				res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
-			mem_cgroup_swap_statistics(memcg, false);
-			mem_cgroup_put(memcg);
+			if (!mem_cgroup_is_root(swap_memcg))
+				res_counter_uncharge(&swap_memcg->memsw,
+						     PAGE_SIZE);
+			mem_cgroup_swap_statistics(swap_memcg, false);
+			mem_cgroup_put(swap_memcg);
 		}
 		rcu_read_unlock();
 	}
@@ -2920,13 +2921,14 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 	 * So, rmdir()->pre_destroy() can be called while we do this charge.
 	 * In that case, we need to call pre_destroy() again. check it here.
 	 */
-	cgroup_release_and_wakeup_rmdir(&ptr->css);
+	cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
-void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
+void mem_cgroup_commit_charge_swapin(struct page *page,
+				     struct mem_cgroup *memcg)
 {
-	__mem_cgroup_commit_charge_swapin(page, ptr,
-					MEM_CGROUP_CHARGE_TYPE_MAPPED);
+	__mem_cgroup_commit_charge_swapin(page, memcg,
+					  MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
 void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
@@ -3255,14 +3257,14 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
  * page belongs to.
  */
 int mem_cgroup_prepare_migration(struct page *page,
-	struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask)
+	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
 {
 	struct mem_cgroup *memcg = NULL;
 	struct page_cgroup *pc;
 	enum charge_type ctype;
 	int ret = 0;
 
-	*ptr = NULL;
+	*memcgp = NULL;
 
 	VM_BUG_ON(PageTransHuge(page));
 	if (mem_cgroup_disabled())
@@ -3313,10 +3315,10 @@ int mem_cgroup_prepare_migration(struct page *page,
 	if (!memcg)
 		return 0;
 
-	*ptr = memcg;
-	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false);
+	*memcgp = memcg;
+	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
 	css_put(&memcg->css);/* drop extra refcnt */
-	if (ret || *ptr == NULL) {
+	if (ret || *memcgp == NULL) {
 		if (PageAnon(page)) {
 			lock_page_cgroup(pc);
 			ClearPageCgroupMigration(pc);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 05c0f27d4ed1..2958fd8e7c9a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -152,7 +152,7 @@ struct task_struct *find_lock_task_mm(struct task_struct *p)
 
 /* return true if the task is not adequate as candidate victim task. */
 static bool oom_unkillable_task(struct task_struct *p,
-		const struct mem_cgroup *mem, const nodemask_t *nodemask)
+		const struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	if (is_global_init(p))
 		return true;
@@ -160,7 +160,7 @@ static bool oom_unkillable_task(struct task_struct *p,
 		return true;
 
 	/* When mem_cgroup_out_of_memory() and p is not member of the group */
-	if (mem && !task_in_mem_cgroup(p, mem))
+	if (memcg && !task_in_mem_cgroup(p, memcg))
 		return true;
 
 	/* p may not have freeable memory in nodemask */
@@ -179,12 +179,12 @@ static bool oom_unkillable_task(struct task_struct *p,
  * predictable as possible.  The goal is to return the highest value for the
  * task consuming the most memory to avoid subsequent oom failures.
  */
-unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
+unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 		      const nodemask_t *nodemask, unsigned long totalpages)
 {
 	long points;
 
-	if (oom_unkillable_task(p, mem, nodemask))
+	if (oom_unkillable_task(p, memcg, nodemask))
 		return 0;
 
 	p = find_lock_task_mm(p);
@@ -308,7 +308,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
  * (not docbooked, we don't want this one cluttering up the manual)
  */
 static struct task_struct *select_bad_process(unsigned int *ppoints,
-		unsigned long totalpages, struct mem_cgroup *mem,
+		unsigned long totalpages, struct mem_cgroup *memcg,
 		const nodemask_t *nodemask)
 {
 	struct task_struct *g, *p;
@@ -320,7 +320,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 
 		if (p->exit_state)
 			continue;
-		if (oom_unkillable_task(p, mem, nodemask))
+		if (oom_unkillable_task(p, memcg, nodemask))
 			continue;
 
 		/*
@@ -364,7 +364,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 			}
 		}
 
-		points = oom_badness(p, mem, nodemask, totalpages);
+		points = oom_badness(p, memcg, nodemask, totalpages);
 		if (points > *ppoints) {
 			chosen = p;
 			*ppoints = points;
@@ -387,14 +387,14 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
  *
  * Call with tasklist_lock read-locked.
  */
-static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask)
+static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	struct task_struct *p;
 	struct task_struct *task;
 
 	pr_info("[ pid ]   uid  tgid total_vm      rss cpu oom_adj oom_score_adj name\n");
 	for_each_process(p) {
-		if (oom_unkillable_task(p, mem, nodemask))
+		if (oom_unkillable_task(p, memcg, nodemask))
 			continue;
 
 		task = find_lock_task_mm(p);
@@ -417,7 +417,7 @@ static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask)
 }
 
 static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
-			struct mem_cgroup *mem, const nodemask_t *nodemask)
+			struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	task_lock(current);
 	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
@@ -427,10 +427,10 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 	cpuset_print_task_mems_allowed(current);
 	task_unlock(current);
 	dump_stack();
-	mem_cgroup_print_oom_info(mem, p);
+	mem_cgroup_print_oom_info(memcg, p);
 	show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
-		dump_tasks(mem, nodemask);
+		dump_tasks(memcg, nodemask);
 }
 
 #define K(x) ((x) << (PAGE_SHIFT-10))
@@ -484,7 +484,7 @@ static int oom_kill_task(struct task_struct *p)
 
 static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			    unsigned int points, unsigned long totalpages,
-			    struct mem_cgroup *mem, nodemask_t *nodemask,
+			    struct mem_cgroup *memcg, nodemask_t *nodemask,
 			    const char *message)
 {
 	struct task_struct *victim = p;
@@ -493,7 +493,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	unsigned int victim_points = 0;
 
 	if (printk_ratelimit())
-		dump_header(p, gfp_mask, order, mem, nodemask);
+		dump_header(p, gfp_mask, order, memcg, nodemask);
 
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -524,7 +524,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			/*
 			 * oom_badness() returns 0 if the thread is unkillable
 			 */
-			child_points = oom_badness(child, mem, nodemask,
+			child_points = oom_badness(child, memcg, nodemask,
 								totalpages);
 			if (child_points > victim_points) {
 				victim = child;
@@ -561,7 +561,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
 }
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
-void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
+void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask)
 {
 	unsigned long limit;
 	unsigned int points = 0;
@@ -578,14 +578,14 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
 	}
 
 	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL);
-	limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT;
+	limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT;
 	read_lock(&tasklist_lock);
 retry:
-	p = select_bad_process(&points, limit, mem, NULL);
+	p = select_bad_process(&points, limit, memcg, NULL);
 	if (!p || PTR_ERR(p) == -1UL)
 		goto out;
 
-	if (oom_kill_process(p, gfp_mask, 0, points, limit, mem, NULL,
+	if (oom_kill_process(p, gfp_mask, 0, points, limit, memcg, NULL,
 				"Memory cgroup out of memory"))
 		goto retry;
 out:
diff --git a/mm/rmap.c b/mm/rmap.c
index a2e5ce1fa081..c8454e06b6c8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -773,7 +773,7 @@ out:
 }
 
 static int page_referenced_anon(struct page *page,
-				struct mem_cgroup *mem_cont,
+				struct mem_cgroup *memcg,
 				unsigned long *vm_flags)
 {
 	unsigned int mapcount;
@@ -796,7 +796,7 @@ static int page_referenced_anon(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
+		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
 			continue;
 		referenced += page_referenced_one(page, vma, address,
 						  &mapcount, vm_flags);
@@ -811,7 +811,7 @@ static int page_referenced_anon(struct page *page,
 /**
  * page_referenced_file - referenced check for object-based rmap
  * @page: the page we're checking references on.
- * @mem_cont: target memory controller
+ * @memcg: target memory control group
  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * For an object-based mapped page, find all the places it is mapped and
@@ -822,7 +822,7 @@ static int page_referenced_anon(struct page *page,
  * This function is only called from page_referenced for object-based pages.
  */
 static int page_referenced_file(struct page *page,
-				struct mem_cgroup *mem_cont,
+				struct mem_cgroup *memcg,
 				unsigned long *vm_flags)
 {
 	unsigned int mapcount;
@@ -864,7 +864,7 @@ static int page_referenced_file(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
+		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
 			continue;
 		referenced += page_referenced_one(page, vma, address,
 						  &mapcount, vm_flags);
@@ -880,7 +880,7 @@ static int page_referenced_file(struct page *page,
  * page_referenced - test if the page was referenced
  * @page: the page to test
  * @is_locked: caller holds lock on the page
- * @mem_cont: target memory controller
+ * @memcg: target memory cgroup
  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * Quick test_and_clear_referenced for all mappings to a page,
@@ -888,7 +888,7 @@ static int page_referenced_file(struct page *page,
  */
 int page_referenced(struct page *page,
 		    int is_locked,
-		    struct mem_cgroup *mem_cont,
+		    struct mem_cgroup *memcg,
 		    unsigned long *vm_flags)
 {
 	int referenced = 0;
@@ -904,13 +904,13 @@ int page_referenced(struct page *page,
 			}
 		}
 		if (unlikely(PageKsm(page)))
-			referenced += page_referenced_ksm(page, mem_cont,
+			referenced += page_referenced_ksm(page, memcg,
 								vm_flags);
 		else if (PageAnon(page))
-			referenced += page_referenced_anon(page, mem_cont,
+			referenced += page_referenced_anon(page, memcg,
 								vm_flags);
 		else if (page->mapping)
-			referenced += page_referenced_file(page, mem_cont,
+			referenced += page_referenced_file(page, memcg,
 								vm_flags);
 		if (we_locked)
 			unlock_page(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 9520592d4231..d999f090dfda 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -847,12 +847,13 @@ unsigned int count_swap_pages(int type, int free)
 static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, swp_entry_t entry, struct page *page)
 {
-	struct mem_cgroup *ptr;
+	struct mem_cgroup *memcg;
 	spinlock_t *ptl;
 	pte_t *pte;
 	int ret = 1;
 
-	if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {
+	if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
+					 GFP_KERNEL, &memcg)) {
 		ret = -ENOMEM;
 		goto out_nolock;
 	}
@@ -860,7 +861,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
 		if (ret > 0)
-			mem_cgroup_cancel_charge_swapin(ptr);
+			mem_cgroup_cancel_charge_swapin(memcg);
 		ret = 0;
 		goto out;
 	}
@@ -871,7 +872,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	set_pte_at(vma->vm_mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, addr);
-	mem_cgroup_commit_charge_swapin(page, ptr);
+	mem_cgroup_commit_charge_swapin(page, memcg);
 	swap_free(entry);
 	/*
 	 * Move the page to the active list so it is not
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 813aae820a27..e16ca8384ef7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2376,7 +2376,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
-unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
 						gfp_t gfp_mask, bool noswap,
 						struct zone *zone,
 						unsigned long *nr_scanned)
@@ -2388,10 +2388,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 		.may_unmap = 1,
 		.may_swap = !noswap,
 		.order = 0,
-		.target_mem_cgroup = mem,
+		.target_mem_cgroup = memcg,
 	};
 	struct mem_cgroup_zone mz = {
-		.mem_cgroup = mem,
+		.mem_cgroup = memcg,
 		.zone = zone,
 	};
 
@@ -2417,7 +2417,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 	return sc.nr_reclaimed;
 }
 
-unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
+unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 					   gfp_t gfp_mask,
 					   bool noswap)
 {
@@ -2430,7 +2430,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 		.may_swap = !noswap,
 		.nr_to_reclaim = SWAP_CLUSTER_MAX,
 		.order = 0,
-		.target_mem_cgroup = mem_cont,
+		.target_mem_cgroup = memcg,
 		.nodemask = NULL, /* we don't care the placement */
 		.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 				(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2444,7 +2444,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 	 * take care of from where we get pages. So the node where we start the
 	 * scan does not need to be the current node.
 	 */
-	nid = mem_cgroup_select_victim_node(mem_cont);
+	nid = mem_cgroup_select_victim_node(memcg);
 
 	zonelist = NODE_DATA(nid)->node_zonelists;
 
-- 
cgit v1.2.3


From 9fb4b7cc0724f178d4b24a2a566ea1e7cb120b82 Mon Sep 17 00:00:00 2001
From: Bob Liu <lliubbo@gmail.com>
Date: Thu, 12 Jan 2012 17:18:48 -0800
Subject: page_cgroup: add helper function to get swap_cgroup

There are multiple places which need to get the swap_cgroup address, so
add a helper function:

  static struct swap_cgroup *swap_cgroup_getsc(swp_entry_t ent,
                                struct swap_cgroup_ctrl **ctrl);

to simplify the code.

Signed-off-by: Bob Liu <lliubbo@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h |  4 ++--
 mm/memcontrol.c             |  4 ++--
 mm/page_cgroup.c            | 56 ++++++++++++++++-----------------------------
 3 files changed, 24 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index aaa60da8783c..1153095ee457 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -149,7 +149,7 @@ static inline void __init page_cgroup_init_flatmem(void)
 extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
 					unsigned short old, unsigned short new);
 extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
-extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
+extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
 extern void swap_cgroup_swapoff(int type);
 #else
@@ -161,7 +161,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 }
 
 static inline
-unsigned short lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
 {
 	return 0;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 71a9774e6ead..4c53e971749e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2474,7 +2474,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 			memcg = NULL;
 	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
-		id = lookup_swap_cgroup(ent);
+		id = lookup_swap_cgroup_id(ent);
 		rcu_read_lock();
 		memcg = mem_cgroup_lookup(id);
 		if (memcg && !css_tryget(&memcg->css))
@@ -5264,7 +5264,7 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma,
 	}
 	/* There is a swap entry and a page doesn't exist or isn't charged */
 	if (ent.val && !ret &&
-			css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
+			css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) {
 		ret = MC_TARGET_SWAP;
 		if (target)
 			target->ent = ent;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index e910524e5a08..b99d19edf89b 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -334,7 +334,6 @@ struct swap_cgroup {
 	unsigned short		id;
 };
 #define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
-#define SC_POS_MASK	(SC_PER_PAGE - 1)
 
 /*
  * SwapCgroup implements "lookup" and "exchange" operations.
@@ -376,6 +375,21 @@ not_enough_page:
 	return -ENOMEM;
 }
 
+static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
+					struct swap_cgroup_ctrl **ctrlp)
+{
+	pgoff_t offset = swp_offset(ent);
+	struct swap_cgroup_ctrl *ctrl;
+	struct page *mappage;
+
+	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
+	if (ctrlp)
+		*ctrlp = ctrl;
+
+	mappage = ctrl->map[offset / SC_PER_PAGE];
+	return page_address(mappage) + offset % SC_PER_PAGE;
+}
+
 /**
  * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
  * @end: swap entry to be cmpxchged
@@ -388,21 +402,13 @@ not_enough_page:
 unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
 					unsigned short old, unsigned short new)
 {
-	int type = swp_type(ent);
-	unsigned long offset = swp_offset(ent);
-	unsigned long idx = offset / SC_PER_PAGE;
-	unsigned long pos = offset & SC_POS_MASK;
 	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
 	struct swap_cgroup *sc;
 	unsigned long flags;
 	unsigned short retval;
 
-	ctrl = &swap_cgroup_ctrl[type];
+	sc = lookup_swap_cgroup(ent, &ctrl);
 
-	mappage = ctrl->map[idx];
-	sc = page_address(mappage);
-	sc += pos;
 	spin_lock_irqsave(&ctrl->lock, flags);
 	retval = sc->id;
 	if (retval == old)
@@ -423,21 +429,13 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
  */
 unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 {
-	int type = swp_type(ent);
-	unsigned long offset = swp_offset(ent);
-	unsigned long idx = offset / SC_PER_PAGE;
-	unsigned long pos = offset & SC_POS_MASK;
 	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
 	struct swap_cgroup *sc;
 	unsigned short old;
 	unsigned long flags;
 
-	ctrl = &swap_cgroup_ctrl[type];
+	sc = lookup_swap_cgroup(ent, &ctrl);
 
-	mappage = ctrl->map[idx];
-	sc = page_address(mappage);
-	sc += pos;
 	spin_lock_irqsave(&ctrl->lock, flags);
 	old = sc->id;
 	sc->id = id;
@@ -447,28 +445,14 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 }
 
 /**
- * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
+ * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
  * @ent: swap entry to be looked up.
  *
  * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
  */
-unsigned short lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
 {
-	int type = swp_type(ent);
-	unsigned long offset = swp_offset(ent);
-	unsigned long idx = offset / SC_PER_PAGE;
-	unsigned long pos = offset & SC_POS_MASK;
-	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
-	struct swap_cgroup *sc;
-	unsigned short ret;
-
-	ctrl = &swap_cgroup_ctrl[type];
-	mappage = ctrl->map[idx];
-	sc = page_address(mappage);
-	sc += pos;
-	ret = sc->id;
-	return ret;
+	return lookup_swap_cgroup(ent, NULL)->id;
 }
 
 int swap_cgroup_swapon(int type, unsigned long max_pages)
-- 
cgit v1.2.3


From 4e5f01c2b9b94321992acb09c35d34f5ee5bb274 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 12 Jan 2012 17:18:58 -0800
Subject: memcg: clear pc->mem_cgroup if necessary.

This is a preparation before removing a flag PCG_ACCT_LRU in page_cgroup
and reducing atomic ops/complexity in memcg LRU handling.

In some cases, pages are added to lru before charge to memcg and pages
are not classfied to memory cgroup at lru addtion.  Now, the lru where
the page should be added is determined a bit in page_cgroup->flags and
pc->mem_cgroup.  I'd like to remove the check of flag.

To handle the case pc->mem_cgroup may contain stale pointers if pages
are added to LRU before classification.  This patch resets
pc->mem_cgroup to root_mem_cgroup before lru additions.

[akpm@linux-foundation.org: fix CONFIG_CGROUP_MEM_CONT=n build]
[hughd@google.com: fix CONFIG_CGROUP_MEM_RES_CTLR=y CONFIG_CGROUP_MEM_RES_CTLR_SWAP=n build]
[akpm@linux-foundation.org: ksm.c needs memcontrol.h, per Michal]
[hughd@google.com: stop oops in mem_cgroup_reset_owner()]
[hughd@google.com: fix page migration to reset_owner]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 +++++
 mm/ksm.c                   | 11 +++++++++++
 mm/memcontrol.c            | 17 +++++++++++++++++
 mm/migrate.c               |  2 ++
 mm/swap_state.c            | 10 ++++++++++
 5 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b80de520670b..4d34356fe644 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -129,6 +129,7 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
 					struct page *newpage);
 
+extern void mem_cgroup_reset_owner(struct page *page);
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
 #endif
@@ -391,6 +392,10 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
 				struct page *newpage)
 {
 }
+
+static inline void mem_cgroup_reset_owner(struct page *page)
+{
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
diff --git a/mm/ksm.c b/mm/ksm.c
index 310544a379ae..1925ffbfb27f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -28,6 +28,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
+#include <linux/memcontrol.h>
 #include <linux/rbtree.h>
 #include <linux/memory.h>
 #include <linux/mmu_notifier.h>
@@ -1571,6 +1572,16 @@ struct page *ksm_does_need_to_copy(struct page *page,
 
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 	if (new_page) {
+		/*
+		 * The memcg-specific accounting when moving
+		 * pages around the LRU lists relies on the
+		 * page's owner (memcg) to be valid.  Usually,
+		 * pages are assigned to a new owner before
+		 * being put on the LRU list, but since this
+		 * is not the case here, the stale owner from
+		 * a previous allocation cycle must be reset.
+		 */
+		mem_cgroup_reset_owner(new_page);
 		copy_user_highpage(new_page, page, address, vma);
 
 		SetPageDirty(new_page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d58bb5fa4403..c74102d6eb5a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3050,6 +3050,23 @@ void mem_cgroup_uncharge_end(void)
 	batch->memcg = NULL;
 }
 
+/*
+ * A function for resetting pc->mem_cgroup for newly allocated pages.
+ * This function should be called if the newpage will be added to LRU
+ * before start accounting.
+ */
+void mem_cgroup_reset_owner(struct page *newpage)
+{
+	struct page_cgroup *pc;
+
+	if (mem_cgroup_disabled())
+		return;
+
+	pc = lookup_page_cgroup(newpage);
+	VM_BUG_ON(PageCgroupUsed(pc));
+	pc->mem_cgroup = root_mem_cgroup;
+}
+
 #ifdef CONFIG_SWAP
 /*
  * called after __delete_from_swap_cache() and drop "page" account.
diff --git a/mm/migrate.c b/mm/migrate.c
index 89ea0854332e..fc391985899f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -777,6 +777,8 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	if (!newpage)
 		return -ENOMEM;
 
+	mem_cgroup_reset_owner(newpage);
+
 	if (page_count(page) == 1) {
 		/* page was freed from under us. So we are done. */
 		goto out;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ea6b32d61873..470038a91873 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -300,6 +300,16 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			new_page = alloc_page_vma(gfp_mask, vma, addr);
 			if (!new_page)
 				break;		/* Out of memory */
+			/*
+			 * The memcg-specific accounting when moving
+			 * pages around the LRU lists relies on the
+			 * page's owner (memcg) to be valid.  Usually,
+			 * pages are assigned to a new owner before
+			 * being put on the LRU list, but since this
+			 * is not the case here, the stale owner from
+			 * a previous allocation cycle must be reset.
+			 */
+			mem_cgroup_reset_owner(new_page);
 		}
 
 		/*
-- 
cgit v1.2.3


From 38c5d72f3ebe5ddd57d2f08dc035070fc6c9a287 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 12 Jan 2012 17:19:01 -0800
Subject: memcg: simplify LRU handling by new rule

Now, at LRU handling, memory cgroup needs to do complicated works to see
valid pc->mem_cgroup, which may be overwritten.

This patch is for relaxing the protocol. This patch guarantees
   - when pc->mem_cgroup is overwritten, page must not be on LRU.

By this, LRU routine can believe pc->mem_cgroup and don't need to check
bits on pc->flags.  This new rule may adds small overheads to swapin.  But
in most case, lru handling gets faster.

After this patch, PCG_ACCT_LRU bit is obsolete and removed.

[akpm@linux-foundation.org: remove unneeded VM_BUG_ON(), restore hannes's christmas tree]
[akpm@linux-foundation.org: clean up code comment]
[hughd@google.com: fix NULL mem_cgroup_try_charge]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h |   8 ---
 mm/memcontrol.c             | 123 +++++++++++++++++++-------------------------
 2 files changed, 54 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 1153095ee457..a2d11771c84b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -10,8 +10,6 @@ enum {
 	/* flags for mem_cgroup and file and I/O status */
 	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
 	PCG_FILE_MAPPED, /* page is accounted as "mapped" */
-	/* No lock in page_cgroup */
-	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
 	__NR_PCG_FLAGS,
 };
 
@@ -75,12 +73,6 @@ TESTPCGFLAG(Used, USED)
 CLEARPCGFLAG(Used, USED)
 SETPCGFLAG(Used, USED)
 
-SETPCGFLAG(AcctLRU, ACCT_LRU)
-CLEARPCGFLAG(AcctLRU, ACCT_LRU)
-TESTPCGFLAG(AcctLRU, ACCT_LRU)
-TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
-
-
 SETPCGFLAG(FileMapped, FILE_MAPPED)
 CLEARPCGFLAG(FileMapped, FILE_MAPPED)
 TESTPCGFLAG(FileMapped, FILE_MAPPED)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c74102d6eb5a..ff051ee8fb4b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1040,30 +1040,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
 		return &zone->lruvec;
 
 	pc = lookup_page_cgroup(page);
-	VM_BUG_ON(PageCgroupAcctLRU(pc));
-	/*
-	 * putback:				charge:
-	 * SetPageLRU				SetPageCgroupUsed
-	 * smp_mb				smp_mb
-	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
-	 *
-	 * Ensure that one of the two sides adds the page to the memcg
-	 * LRU during a race.
-	 */
-	smp_mb();
-	/*
-	 * If the page is uncharged, it may be freed soon, but it
-	 * could also be swap cache (readahead, swapoff) that needs to
-	 * be reclaimable in the future.  root_mem_cgroup will babysit
-	 * it for the time being.
-	 */
-	if (PageCgroupUsed(pc)) {
-		/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-		smp_rmb();
-		memcg = pc->mem_cgroup;
-		SetPageCgroupAcctLRU(pc);
-	} else
-		memcg = root_mem_cgroup;
+	memcg = pc->mem_cgroup;
 	mz = page_cgroup_zoneinfo(memcg, page);
 	/* compound_order() is stabilized through lru_lock */
 	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
@@ -1090,18 +1067,8 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 		return;
 
 	pc = lookup_page_cgroup(page);
-	/*
-	 * root_mem_cgroup babysits uncharged LRU pages, but
-	 * PageCgroupUsed is cleared when the page is about to get
-	 * freed.  PageCgroupAcctLRU remembers whether the
-	 * LRU-accounting happened against pc->mem_cgroup or
-	 * root_mem_cgroup.
-	 */
-	if (TestClearPageCgroupAcctLRU(pc)) {
-		VM_BUG_ON(!pc->mem_cgroup);
-		memcg = pc->mem_cgroup;
-	} else
-		memcg = root_mem_cgroup;
+	memcg = pc->mem_cgroup;
+	VM_BUG_ON(!memcg);
 	mz = page_cgroup_zoneinfo(memcg, page);
 	/* huge page split is done under lru_lock. so, we have no races. */
 	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
@@ -2217,8 +2184,25 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 }
 
 /*
- * Unlike exported interface, "oom" parameter is added. if oom==true,
- * oom-killer can be invoked.
+ * __mem_cgroup_try_charge() does
+ * 1. detect memcg to be charged against from passed *mm and *ptr,
+ * 2. update res_counter
+ * 3. call memory reclaim if necessary.
+ *
+ * In some special case, if the task is fatal, fatal_signal_pending() or
+ * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
+ * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
+ * as possible without any hazards. 2: all pages should have a valid
+ * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
+ * pointer, that is treated as a charge to root_mem_cgroup.
+ *
+ * So __mem_cgroup_try_charge() will return
+ *  0       ...  on success, filling *ptr with a valid memcg pointer.
+ *  -ENOMEM ...  charge failure because of resource limits.
+ *  -EINTR  ...  if thread is fatal. *ptr is filled with root_mem_cgroup.
+ *
+ * Unlike the exported interface, an "oom" parameter is added. if oom==true,
+ * the oom-killer can be invoked.
  */
 static int __mem_cgroup_try_charge(struct mm_struct *mm,
 				   gfp_t gfp_mask,
@@ -2247,7 +2231,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
 	if (!*ptr && !mm)
-		goto bypass;
+		*ptr = root_mem_cgroup;
 again:
 	if (*ptr) { /* css should be a valid one */
 		memcg = *ptr;
@@ -2273,7 +2257,9 @@ again:
 		 * task-struct. So, mm->owner can be NULL.
 		 */
 		memcg = mem_cgroup_from_task(p);
-		if (!memcg || mem_cgroup_is_root(memcg)) {
+		if (!memcg)
+			memcg = root_mem_cgroup;
+		if (mem_cgroup_is_root(memcg)) {
 			rcu_read_unlock();
 			goto done;
 		}
@@ -2348,8 +2334,8 @@ nomem:
 	*ptr = NULL;
 	return -ENOMEM;
 bypass:
-	*ptr = NULL;
-	return 0;
+	*ptr = root_mem_cgroup;
+	return -EINTR;
 }
 
 /*
@@ -2457,6 +2443,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 
 	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
 	unlock_page_cgroup(pc);
+	WARN_ON_ONCE(PageLRU(page));
 	/*
 	 * "charge_statistics" updated event counter. Then, check it.
 	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2468,7 +2455,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
-			(1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
+			(1 << PCG_MIGRATION))
 /*
  * Because tail pages are not marked as "used", set it. We're under
  * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -2478,7 +2465,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 void mem_cgroup_split_huge_fixup(struct page *head)
 {
 	struct page_cgroup *head_pc = lookup_page_cgroup(head);
+	struct mem_cgroup_per_zone *mz;
 	struct page_cgroup *pc;
+	enum lru_list lru;
 	int i;
 
 	if (mem_cgroup_disabled())
@@ -2487,23 +2476,15 @@ void mem_cgroup_split_huge_fixup(struct page *head)
 		pc = head_pc + i;
 		pc->mem_cgroup = head_pc->mem_cgroup;
 		smp_wmb();/* see __commit_charge() */
-		/*
-		 * LRU flags cannot be copied because we need to add tail
-		 * page to LRU by generic call and our hooks will be called.
-		 */
 		pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
 	}
-
-	if (PageCgroupAcctLRU(head_pc)) {
-		enum lru_list lru;
-		struct mem_cgroup_per_zone *mz;
-		/*
-		 * We hold lru_lock, then, reduce counter directly.
-		 */
-		lru = page_lru(head);
-		mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
-		MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
-	}
+	/*
+	 * Tail pages will be added to LRU.
+	 * We hold lru_lock,then,reduce counter directly.
+	 */
+	lru = page_lru(head);
+	mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
+	MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
 }
 #endif
 
@@ -2620,7 +2601,7 @@ static int mem_cgroup_move_parent(struct page *page,
 
 	parent = mem_cgroup_from_cont(pcg);
 	ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false);
-	if (ret || !parent)
+	if (ret)
 		goto put_back;
 
 	if (nr_pages > 1)
@@ -2667,9 +2648,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 
 	pc = lookup_page_cgroup(page);
 	ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
-	if (ret || !memcg)
+	if (ret == -ENOMEM)
 		return ret;
-
 	__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
 	return 0;
 }
@@ -2736,10 +2716,9 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 	if (!page_is_file_cache(page))
 		type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
 
-	if (!PageSwapCache(page)) {
+	if (!PageSwapCache(page))
 		ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
-		WARN_ON_ONCE(PageLRU(page));
-	} else { /* page is swapcache/shmem */
+	else { /* page is swapcache/shmem */
 		ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
 		if (!ret)
 			__mem_cgroup_commit_charge_swapin(page, memcg, type);
@@ -2781,11 +2760,16 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 	*memcgp = memcg;
 	ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
 	css_put(&memcg->css);
+	if (ret == -EINTR)
+		ret = 0;
 	return ret;
 charge_cur_mm:
 	if (unlikely(!mm))
 		mm = &init_mm;
-	return __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
+	ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
+	if (ret == -EINTR)
+		ret = 0;
+	return ret;
 }
 
 static void
@@ -3245,7 +3229,7 @@ int mem_cgroup_prepare_migration(struct page *page,
 	*memcgp = memcg;
 	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
 	css_put(&memcg->css);/* drop extra refcnt */
-	if (ret || *memcgp == NULL) {
+	if (ret) {
 		if (PageAnon(page)) {
 			lock_page_cgroup(pc);
 			ClearPageCgroupMigration(pc);
@@ -3255,6 +3239,7 @@ int mem_cgroup_prepare_migration(struct page *page,
 			 */
 			mem_cgroup_uncharge_page(page);
 		}
+		/* we'll need to revisit this error code (we have -EINTR) */
 		return -ENOMEM;
 	}
 	/*
@@ -3674,7 +3659,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 		pc = lookup_page_cgroup(page);
 
 		ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
-		if (ret == -ENOMEM)
+		if (ret == -ENOMEM || ret == -EINTR)
 			break;
 
 		if (ret == -EBUSY || ret == -EINVAL) {
@@ -5065,9 +5050,9 @@ one_by_one:
 		}
 		ret = __mem_cgroup_try_charge(NULL,
 					GFP_KERNEL, 1, &memcg, false);
-		if (ret || !memcg)
+		if (ret)
 			/* mem_cgroup_clear_mc() will do uncharge later */
-			return -ENOMEM;
+			return ret;
 		mc.precharge++;
 	}
 	return ret;
-- 
cgit v1.2.3


From f21760b15dcd091e5afd38d0b97197b45f7ef2ea Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Thu, 12 Jan 2012 17:19:16 -0800
Subject: thp: add tlb_remove_pmd_tlb_entry

We have tlb_remove_tlb_entry to indicate a pte tlb flush entry should be
flushed, but not a corresponding API for pmd entry.  This isn't a
problem so far because THP is only for x86 currently and tlb_flush()
under x86 will flush entire TLB.  But this is confusion and could be
missed if thp is ported to other arch.

Also convert tlb->need_flush = 1 to a VM_BUG_ON(!tlb->need_flush) in
__tlb_remove_page() as suggested by Andrea Arcangeli.  The
__tlb_remove_page() function is supposed to be called after
tlb_remove_xxx_tlb_entry() and we can catch any misuse.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/tlb.h | 14 ++++++++++++++
 include/linux/huge_mm.h   |  2 +-
 mm/huge_memory.c          |  3 ++-
 mm/memory.c               |  4 ++--
 4 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index e58fa777fa09..f96a5b58a975 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -139,6 +139,20 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
+/**
+ * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
+ * This is a nop so far, because only x86 needs it.
+ */
+#ifndef __tlb_remove_pmd_tlb_entry
+#define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
+#endif
+
+#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)		\
+	do {							\
+		tlb->need_flush = 1;				\
+		__tlb_remove_pmd_tlb_entry(tlb, pmdp, address);	\
+	} while (0)
+
 #define pte_free_tlb(tlb, ptep, address)			\
 	do {							\
 		tlb->need_flush = 1;				\
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a9ace9c32507..1b921299abc4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -18,7 +18,7 @@ extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
 					  unsigned int flags);
 extern int zap_huge_pmd(struct mmu_gather *tlb,
 			struct vm_area_struct *vma,
-			pmd_t *pmd);
+			pmd_t *pmd, unsigned long addr);
 extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, unsigned long end,
 			unsigned char *vec);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 964fc5a2edd2..5a595554bd8c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1026,7 +1026,7 @@ out:
 }
 
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
-		 pmd_t *pmd)
+		 pmd_t *pmd, unsigned long addr)
 {
 	int ret = 0;
 
@@ -1042,6 +1042,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			pgtable = get_pmd_huge_pte(tlb->mm);
 			page = pmd_page(*pmd);
 			pmd_clear(pmd);
+			tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
 			page_remove_rmap(page);
 			VM_BUG_ON(page_mapcount(page) < 0);
 			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
diff --git a/mm/memory.c b/mm/memory.c
index 829d43735402..5e30583c2605 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -293,7 +293,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
 	struct mmu_gather_batch *batch;
 
-	tlb->need_flush = 1;
+	VM_BUG_ON(!tlb->need_flush);
 
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
@@ -1231,7 +1231,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 			if (next-addr != HPAGE_PMD_SIZE) {
 				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			} else if (zap_huge_pmd(tlb, vma, pmd))
+			} else if (zap_huge_pmd(tlb, vma, pmd, addr))
 				continue;
 			/* fall through */
 		}
-- 
cgit v1.2.3


From b969c4ab9f182a6e1b2a0848be349f99714947b0 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 12 Jan 2012 17:19:34 -0800
Subject: mm: compaction: determine if dirty pages can be migrated without
 blocking within ->migratepage

Asynchronous compaction is used when allocating transparent hugepages to
avoid blocking for long periods of time.  Due to reports of stalling,
there was a debate on disabling synchronous compaction but this severely
impacted allocation success rates.  Part of the reason was that many dirty
pages are skipped in asynchronous compaction by the following check;

	if (PageDirty(page) && !sync &&
		mapping->a_ops->migratepage != migrate_page)
			rc = -EBUSY;

This skips over all mapping aops using buffer_migrate_page() even though
it is possible to migrate some of these pages without blocking.  This
patch updates the ->migratepage callback with a "sync" parameter.  It is
the responsibility of the callback to fail gracefully if migration would
block.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/disk-io.c      |   4 +-
 fs/hugetlbfs/inode.c    |   3 +-
 fs/nfs/internal.h       |   2 +-
 fs/nfs/write.c          |   4 +-
 include/linux/fs.h      |   9 ++--
 include/linux/migrate.h |   2 +-
 mm/migrate.c            | 129 ++++++++++++++++++++++++++++++++++--------------
 7 files changed, 106 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f99a099a7747..1375494c8cb6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -872,7 +872,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
 #ifdef CONFIG_MIGRATION
 static int btree_migratepage(struct address_space *mapping,
-			struct page *newpage, struct page *page)
+			struct page *newpage, struct page *page, bool sync)
 {
 	/*
 	 * we can't safely write a btree page from here,
@@ -887,7 +887,7 @@ static int btree_migratepage(struct address_space *mapping,
 	if (page_has_private(page) &&
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
-	return migrate_page(mapping, newpage, page);
+	return migrate_page(mapping, newpage, page, sync);
 }
 #endif
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e425ad9d0490..06fd4608a990 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -583,7 +583,8 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 }
 
 static int hugetlbfs_migrate_page(struct address_space *mapping,
-				struct page *newpage, struct page *page)
+				struct page *newpage, struct page *page,
+				bool sync)
 {
 	int rc;
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5ee92538b063..114398a15830 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -332,7 +332,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
-		struct page *, struct page *);
+		struct page *, struct page *, bool);
 #else
 #define nfs_migrate_page NULL
 #endif
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0c3885255f97..889e98bc5a21 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1688,7 +1688,7 @@ out_error:
 
 #ifdef CONFIG_MIGRATION
 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
-		struct page *page)
+		struct page *page, bool sync)
 {
 	/*
 	 * If PagePrivate is set, then the page is currently associated with
@@ -1703,7 +1703,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 
 	nfs_fscache_release_page(page, GFP_KERNEL);
 
-	return migrate_page(mapping, newpage, page);
+	return migrate_page(mapping, newpage, page, sync);
 }
 #endif
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a7409bc157e0..b92b73d0b2b9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -609,9 +609,12 @@ struct address_space_operations {
 			loff_t offset, unsigned long nr_segs);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
 						void **, unsigned long *);
-	/* migrate the contents of a page to the specified target */
+	/*
+	 * migrate the contents of a page to the specified target. If sync
+	 * is false, it must not block.
+	 */
 	int (*migratepage) (struct address_space *,
-			struct page *, struct page *);
+			struct page *, struct page *, bool);
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
@@ -2537,7 +2540,7 @@ extern int generic_check_addressable(unsigned, u64);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-				struct page *, struct page *);
+				struct page *, struct page *, bool);
 #else
 #define buffer_migrate_page NULL
 #endif
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index e39aeecfe9a2..14e6d2a88475 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -11,7 +11,7 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
 extern void putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
-			struct page *, struct page *);
+			struct page *, struct page *, bool);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
 			bool sync);
diff --git a/mm/migrate.c b/mm/migrate.c
index fc391985899f..4e86f3bacb85 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -216,6 +216,55 @@ out:
 	pte_unmap_unlock(ptep, ptl);
 }
 
+#ifdef CONFIG_BLOCK
+/* Returns true if all buffers are successfully locked */
+static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
+{
+	struct buffer_head *bh = head;
+
+	/* Simple case, sync compaction */
+	if (sync) {
+		do {
+			get_bh(bh);
+			lock_buffer(bh);
+			bh = bh->b_this_page;
+
+		} while (bh != head);
+
+		return true;
+	}
+
+	/* async case, we cannot block on lock_buffer so use trylock_buffer */
+	do {
+		get_bh(bh);
+		if (!trylock_buffer(bh)) {
+			/*
+			 * We failed to lock the buffer and cannot stall in
+			 * async migration. Release the taken locks
+			 */
+			struct buffer_head *failed_bh = bh;
+			put_bh(failed_bh);
+			bh = head;
+			while (bh != failed_bh) {
+				unlock_buffer(bh);
+				put_bh(bh);
+				bh = bh->b_this_page;
+			}
+			return false;
+		}
+
+		bh = bh->b_this_page;
+	} while (bh != head);
+	return true;
+}
+#else
+static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
+								bool sync)
+{
+	return true;
+}
+#endif /* CONFIG_BLOCK */
+
 /*
  * Replace the page in the mapping.
  *
@@ -225,7 +274,8 @@ out:
  * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
  */
 static int migrate_page_move_mapping(struct address_space *mapping,
-		struct page *newpage, struct page *page)
+		struct page *newpage, struct page *page,
+		struct buffer_head *head, bool sync)
 {
 	int expected_count;
 	void **pslot;
@@ -254,6 +304,19 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 		return -EAGAIN;
 	}
 
+	/*
+	 * In the async migration case of moving a page with buffers, lock the
+	 * buffers using trylock before the mapping is moved. If the mapping
+	 * was moved, we later failed to lock the buffers and could not move
+	 * the mapping back due to an elevated page count, we would have to
+	 * block waiting on other references to be dropped.
+	 */
+	if (!sync && head && !buffer_migrate_lock_buffers(head, sync)) {
+		page_unfreeze_refs(page, expected_count);
+		spin_unlock_irq(&mapping->tree_lock);
+		return -EAGAIN;
+	}
+
 	/*
 	 * Now we know that no one else is looking at the page.
 	 */
@@ -409,13 +472,13 @@ EXPORT_SYMBOL(fail_migrate_page);
  * Pages are locked upon entry and exit.
  */
 int migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page)
+		struct page *newpage, struct page *page, bool sync)
 {
 	int rc;
 
 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
 
-	rc = migrate_page_move_mapping(mapping, newpage, page);
+	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, sync);
 
 	if (rc)
 		return rc;
@@ -432,28 +495,28 @@ EXPORT_SYMBOL(migrate_page);
  * exist.
  */
 int buffer_migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page)
+		struct page *newpage, struct page *page, bool sync)
 {
 	struct buffer_head *bh, *head;
 	int rc;
 
 	if (!page_has_buffers(page))
-		return migrate_page(mapping, newpage, page);
+		return migrate_page(mapping, newpage, page, sync);
 
 	head = page_buffers(page);
 
-	rc = migrate_page_move_mapping(mapping, newpage, page);
+	rc = migrate_page_move_mapping(mapping, newpage, page, head, sync);
 
 	if (rc)
 		return rc;
 
-	bh = head;
-	do {
-		get_bh(bh);
-		lock_buffer(bh);
-		bh = bh->b_this_page;
-
-	} while (bh != head);
+	/*
+	 * In the async case, migrate_page_move_mapping locked the buffers
+	 * with an IRQ-safe spinlock held. In the sync case, the buffers
+	 * need to be locked now
+	 */
+	if (sync)
+		BUG_ON(!buffer_migrate_lock_buffers(head, sync));
 
 	ClearPagePrivate(page);
 	set_page_private(newpage, page_private(page));
@@ -530,10 +593,13 @@ static int writeout(struct address_space *mapping, struct page *page)
  * Default handling if a filesystem does not provide a migration function.
  */
 static int fallback_migrate_page(struct address_space *mapping,
-	struct page *newpage, struct page *page)
+	struct page *newpage, struct page *page, bool sync)
 {
-	if (PageDirty(page))
+	if (PageDirty(page)) {
+		if (!sync)
+			return -EBUSY;
 		return writeout(mapping, page);
+	}
 
 	/*
 	 * Buffers may be managed in a filesystem specific way.
@@ -543,7 +609,7 @@ static int fallback_migrate_page(struct address_space *mapping,
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
 
-	return migrate_page(mapping, newpage, page);
+	return migrate_page(mapping, newpage, page, sync);
 }
 
 /*
@@ -579,29 +645,18 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 
 	mapping = page_mapping(page);
 	if (!mapping)
-		rc = migrate_page(mapping, newpage, page);
-	else {
+		rc = migrate_page(mapping, newpage, page, sync);
+	else if (mapping->a_ops->migratepage)
 		/*
-		 * Do not writeback pages if !sync and migratepage is
-		 * not pointing to migrate_page() which is nonblocking
-		 * (swapcache/tmpfs uses migratepage = migrate_page).
+		 * Most pages have a mapping and most filesystems provide a
+		 * migratepage callback. Anonymous pages are part of swap
+		 * space which also has its own migratepage callback. This
+		 * is the most common path for page migration.
 		 */
-		if (PageDirty(page) && !sync &&
-		    mapping->a_ops->migratepage != migrate_page)
-			rc = -EBUSY;
-		else if (mapping->a_ops->migratepage)
-			/*
-			 * Most pages have a mapping and most filesystems
-			 * should provide a migration function. Anonymous
-			 * pages are part of swap space which also has its
-			 * own migration function. This is the most common
-			 * path for page migration.
-			 */
-			rc = mapping->a_ops->migratepage(mapping,
-							newpage, page);
-		else
-			rc = fallback_migrate_page(mapping, newpage, page);
-	}
+		rc = mapping->a_ops->migratepage(mapping,
+						newpage, page, sync);
+	else
+		rc = fallback_migrate_page(mapping, newpage, page, sync);
 
 	if (rc) {
 		newpage->mapping = NULL;
-- 
cgit v1.2.3


From c82449352854ff09e43062246af86bdeb628f0c3 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 12 Jan 2012 17:19:38 -0800
Subject: mm: compaction: make isolate_lru_page() filter-aware again

Commit 39deaf85 ("mm: compaction: make isolate_lru_page() filter-aware")
noted that compaction does not migrate dirty or writeback pages and that
is was meaningless to pick the page and re-add it to the LRU list.  This
had to be partially reverted because some dirty pages can be migrated by
compaction without blocking.

This patch updates "mm: compaction: make isolate_lru_page" by skipping
over pages that migration has no possibility of migrating to minimise LRU
disruption.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel<riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  2 ++
 mm/compaction.c        |  3 +++
 mm/vmscan.c            | 35 +++++++++++++++++++++++++++++++++--
 3 files changed, 38 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 42e544cd4c9f..2038b90ca6e3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -177,6 +177,8 @@ struct lruvec {
 #define ISOLATE_CLEAN		((__force isolate_mode_t)0x4)
 /* Isolate unmapped file */
 #define ISOLATE_UNMAPPED	((__force isolate_mode_t)0x8)
+/* Isolate for asynchronous migration */
+#define ISOLATE_ASYNC_MIGRATE	((__force isolate_mode_t)0x10)
 
 /* LRU Isolation modes. */
 typedef unsigned __bitwise__ isolate_mode_t;
diff --git a/mm/compaction.c b/mm/compaction.c
index d31e64becb38..fb291585e1bf 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -349,6 +349,9 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 			continue;
 		}
 
+		if (!cc->sync)
+			mode |= ISOLATE_ASYNC_MIGRATE;
+
 		/* Try isolate the page */
 		if (__isolate_lru_page(page, mode, 0) != 0)
 			continue;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cb68c53db4ec..efbcab1c8f54 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1075,8 +1075,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
 
 	ret = -EBUSY;
 
-	if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page)))
-		return ret;
+	/*
+	 * To minimise LRU disruption, the caller can indicate that it only
+	 * wants to isolate pages it will be able to operate on without
+	 * blocking - clean pages for the most part.
+	 *
+	 * ISOLATE_CLEAN means that only clean pages should be isolated. This
+	 * is used by reclaim when it is cannot write to backing storage
+	 *
+	 * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+	 * that it is possible to migrate without blocking
+	 */
+	if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+		/* All the caller can do on PageWriteback is block */
+		if (PageWriteback(page))
+			return ret;
+
+		if (PageDirty(page)) {
+			struct address_space *mapping;
+
+			/* ISOLATE_CLEAN means only clean pages */
+			if (mode & ISOLATE_CLEAN)
+				return ret;
+
+			/*
+			 * Only pages without mappings or that have a
+			 * ->migratepage callback are possible to migrate
+			 * without blocking
+			 */
+			mapping = page_mapping(page);
+			if (mapping && !mapping->a_ops->migratepage)
+				return ret;
+		}
+	}
 
 	if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
 		return ret;
-- 
cgit v1.2.3


From a6bc32b899223a877f595ef9ddc1e89ead5072b8 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 12 Jan 2012 17:19:43 -0800
Subject: mm: compaction: introduce sync-light migration for use by compaction

This patch adds a lightweight sync migrate operation MIGRATE_SYNC_LIGHT
mode that avoids writing back pages to backing storage.  Async compaction
maps to MIGRATE_ASYNC while sync compaction maps to MIGRATE_SYNC_LIGHT.
For other migrate_pages users such as memory hotplug, MIGRATE_SYNC is
used.

This avoids sync compaction stalling for an excessive length of time,
particularly when copying files to a USB stick where there might be a
large number of dirty pages backed by a filesystem that does not support
->writepages.

[aarcange@redhat.com: This patch is heavily based on Andrea's work]
[akpm@linux-foundation.org: fix fs/nfs/write.c build]
[akpm@linux-foundation.org: fix fs/btrfs/disk-io.c build]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/disk-io.c      |  5 ++--
 fs/hugetlbfs/inode.c    |  2 +-
 fs/nfs/internal.h       |  2 +-
 fs/nfs/write.c          |  4 +--
 include/linux/fs.h      |  6 ++--
 include/linux/migrate.h | 23 +++++++++++----
 mm/compaction.c         |  2 +-
 mm/memory-failure.c     |  2 +-
 mm/memory_hotplug.c     |  2 +-
 mm/mempolicy.c          |  2 +-
 mm/migrate.c            | 78 +++++++++++++++++++++++++++----------------------
 11 files changed, 76 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1375494c8cb6..d8525662ca7a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -872,7 +872,8 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
 #ifdef CONFIG_MIGRATION
 static int btree_migratepage(struct address_space *mapping,
-			struct page *newpage, struct page *page, bool sync)
+			struct page *newpage, struct page *page,
+			enum migrate_mode mode)
 {
 	/*
 	 * we can't safely write a btree page from here,
@@ -887,7 +888,7 @@ static int btree_migratepage(struct address_space *mapping,
 	if (page_has_private(page) &&
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
-	return migrate_page(mapping, newpage, page, sync);
+	return migrate_page(mapping, newpage, page, mode);
 }
 #endif
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 06fd4608a990..1e85a7ac0217 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -584,7 +584,7 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 
 static int hugetlbfs_migrate_page(struct address_space *mapping,
 				struct page *newpage, struct page *page,
-				bool sync)
+				enum migrate_mode mode)
 {
 	int rc;
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 114398a15830..8102db9b926c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -332,7 +332,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
-		struct page *, struct page *, bool);
+		struct page *, struct page *, enum migrate_mode);
 #else
 #define nfs_migrate_page NULL
 #endif
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 889e98bc5a21..834f0fe96f89 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1688,7 +1688,7 @@ out_error:
 
 #ifdef CONFIG_MIGRATION
 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
-		struct page *page, bool sync)
+		struct page *page, enum migrate_mode mode)
 {
 	/*
 	 * If PagePrivate is set, then the page is currently associated with
@@ -1703,7 +1703,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 
 	nfs_fscache_release_page(page, GFP_KERNEL);
 
-	return migrate_page(mapping, newpage, page, sync);
+	return migrate_page(mapping, newpage, page, mode);
 }
 #endif
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b92b73d0b2b9..e694bd4434a4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -525,6 +525,7 @@ enum positive_aop_returns {
 struct page;
 struct address_space;
 struct writeback_control;
+enum migrate_mode;
 
 struct iov_iter {
 	const struct iovec *iov;
@@ -614,7 +615,7 @@ struct address_space_operations {
 	 * is false, it must not block.
 	 */
 	int (*migratepage) (struct address_space *,
-			struct page *, struct page *, bool);
+			struct page *, struct page *, enum migrate_mode);
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
@@ -2540,7 +2541,8 @@ extern int generic_check_addressable(unsigned, u64);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-				struct page *, struct page *, bool);
+				struct page *, struct page *,
+				enum migrate_mode);
 #else
 #define buffer_migrate_page NULL
 #endif
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 14e6d2a88475..eaf867412f7a 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -6,18 +6,31 @@
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
+/*
+ * MIGRATE_ASYNC means never block
+ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
+ *	on most operations but not ->writepage as the potential stall time
+ *	is too significant
+ * MIGRATE_SYNC will block when migrating pages
+ */
+enum migrate_mode {
+	MIGRATE_ASYNC,
+	MIGRATE_SYNC_LIGHT,
+	MIGRATE_SYNC,
+};
+
 #ifdef CONFIG_MIGRATION
 #define PAGE_MIGRATION 1
 
 extern void putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
-			struct page *, struct page *, bool);
+			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			bool sync);
+			enum migrate_mode mode);
 extern int migrate_huge_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			bool sync);
+			enum migrate_mode mode);
 
 extern int fail_migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -36,10 +49,10 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		bool sync) { return -ENOSYS; }
+		enum migrate_mode mode) { return -ENOSYS; }
 static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		bool sync) { return -ENOSYS; }
+		enum migrate_mode mode) { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
diff --git a/mm/compaction.c b/mm/compaction.c
index fb291585e1bf..71a58f67f481 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -557,7 +557,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
 				(unsigned long)cc, false,
-				cc->sync);
+				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 06d3479513aa..56080ea36140 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1557,7 +1557,7 @@ int soft_offline_page(struct page *page, int flags)
 					    page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
 		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
-								0, true);
+							0, MIGRATE_SYNC);
 		if (ret) {
 			putback_lru_pages(&pagelist);
 			pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2168489c0bc9..6629fafd6ce4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -809,7 +809,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		}
 		/* this function returns # of failed pages */
 		ret = migrate_pages(&source, hotremove_migrate_alloc, 0,
-								true, true);
+							true, MIGRATE_SYNC);
 		if (ret)
 			putback_lru_pages(&source);
 	}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e3d58f088466..06b145fb64ab 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -942,7 +942,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_node_page, dest,
-								false, true);
+							false, MIGRATE_SYNC);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index 4e86f3bacb85..9871a56d82c3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -218,12 +218,13 @@ out:
 
 #ifdef CONFIG_BLOCK
 /* Returns true if all buffers are successfully locked */
-static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
+static bool buffer_migrate_lock_buffers(struct buffer_head *head,
+							enum migrate_mode mode)
 {
 	struct buffer_head *bh = head;
 
 	/* Simple case, sync compaction */
-	if (sync) {
+	if (mode != MIGRATE_ASYNC) {
 		do {
 			get_bh(bh);
 			lock_buffer(bh);
@@ -259,7 +260,7 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
 }
 #else
 static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
-								bool sync)
+							enum migrate_mode mode)
 {
 	return true;
 }
@@ -275,7 +276,7 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
  */
 static int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page,
-		struct buffer_head *head, bool sync)
+		struct buffer_head *head, enum migrate_mode mode)
 {
 	int expected_count;
 	void **pslot;
@@ -311,7 +312,8 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	 * the mapping back due to an elevated page count, we would have to
 	 * block waiting on other references to be dropped.
 	 */
-	if (!sync && head && !buffer_migrate_lock_buffers(head, sync)) {
+	if (mode == MIGRATE_ASYNC && head &&
+			!buffer_migrate_lock_buffers(head, mode)) {
 		page_unfreeze_refs(page, expected_count);
 		spin_unlock_irq(&mapping->tree_lock);
 		return -EAGAIN;
@@ -472,13 +474,14 @@ EXPORT_SYMBOL(fail_migrate_page);
  * Pages are locked upon entry and exit.
  */
 int migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page, bool sync)
+		struct page *newpage, struct page *page,
+		enum migrate_mode mode)
 {
 	int rc;
 
 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
 
-	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, sync);
+	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
 
 	if (rc)
 		return rc;
@@ -495,17 +498,17 @@ EXPORT_SYMBOL(migrate_page);
  * exist.
  */
 int buffer_migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page, bool sync)
+		struct page *newpage, struct page *page, enum migrate_mode mode)
 {
 	struct buffer_head *bh, *head;
 	int rc;
 
 	if (!page_has_buffers(page))
-		return migrate_page(mapping, newpage, page, sync);
+		return migrate_page(mapping, newpage, page, mode);
 
 	head = page_buffers(page);
 
-	rc = migrate_page_move_mapping(mapping, newpage, page, head, sync);
+	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
 
 	if (rc)
 		return rc;
@@ -515,8 +518,8 @@ int buffer_migrate_page(struct address_space *mapping,
 	 * with an IRQ-safe spinlock held. In the sync case, the buffers
 	 * need to be locked now
 	 */
-	if (sync)
-		BUG_ON(!buffer_migrate_lock_buffers(head, sync));
+	if (mode != MIGRATE_ASYNC)
+		BUG_ON(!buffer_migrate_lock_buffers(head, mode));
 
 	ClearPagePrivate(page);
 	set_page_private(newpage, page_private(page));
@@ -593,10 +596,11 @@ static int writeout(struct address_space *mapping, struct page *page)
  * Default handling if a filesystem does not provide a migration function.
  */
 static int fallback_migrate_page(struct address_space *mapping,
-	struct page *newpage, struct page *page, bool sync)
+	struct page *newpage, struct page *page, enum migrate_mode mode)
 {
 	if (PageDirty(page)) {
-		if (!sync)
+		/* Only writeback pages in full synchronous migration */
+		if (mode != MIGRATE_SYNC)
 			return -EBUSY;
 		return writeout(mapping, page);
 	}
@@ -609,7 +613,7 @@ static int fallback_migrate_page(struct address_space *mapping,
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
 
-	return migrate_page(mapping, newpage, page, sync);
+	return migrate_page(mapping, newpage, page, mode);
 }
 
 /*
@@ -624,7 +628,7 @@ static int fallback_migrate_page(struct address_space *mapping,
  *  == 0 - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page,
-					int remap_swapcache, bool sync)
+				int remap_swapcache, enum migrate_mode mode)
 {
 	struct address_space *mapping;
 	int rc;
@@ -645,7 +649,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 
 	mapping = page_mapping(page);
 	if (!mapping)
-		rc = migrate_page(mapping, newpage, page, sync);
+		rc = migrate_page(mapping, newpage, page, mode);
 	else if (mapping->a_ops->migratepage)
 		/*
 		 * Most pages have a mapping and most filesystems provide a
@@ -654,9 +658,9 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 		 * is the most common path for page migration.
 		 */
 		rc = mapping->a_ops->migratepage(mapping,
-						newpage, page, sync);
+						newpage, page, mode);
 	else
-		rc = fallback_migrate_page(mapping, newpage, page, sync);
+		rc = fallback_migrate_page(mapping, newpage, page, mode);
 
 	if (rc) {
 		newpage->mapping = NULL;
@@ -671,7 +675,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 }
 
 static int __unmap_and_move(struct page *page, struct page *newpage,
-				int force, bool offlining, bool sync)
+			int force, bool offlining, enum migrate_mode mode)
 {
 	int rc = -EAGAIN;
 	int remap_swapcache = 1;
@@ -680,7 +684,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 	struct anon_vma *anon_vma = NULL;
 
 	if (!trylock_page(page)) {
-		if (!force || !sync)
+		if (!force || mode == MIGRATE_ASYNC)
 			goto out;
 
 		/*
@@ -726,10 +730,12 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 
 	if (PageWriteback(page)) {
 		/*
-		 * For !sync, there is no point retrying as the retry loop
-		 * is expected to be too short for PageWriteback to be cleared
+		 * Only in the case of a full syncronous migration is it
+		 * necessary to wait for PageWriteback. In the async case,
+		 * the retry loop is too short and in the sync-light case,
+		 * the overhead of stalling is too much
 		 */
-		if (!sync) {
+		if (mode != MIGRATE_SYNC) {
 			rc = -EBUSY;
 			goto uncharge;
 		}
@@ -800,7 +806,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 
 skip_unmap:
 	if (!page_mapped(page))
-		rc = move_to_new_page(newpage, page, remap_swapcache, sync);
+		rc = move_to_new_page(newpage, page, remap_swapcache, mode);
 
 	if (rc && remap_swapcache)
 		remove_migration_ptes(page, page);
@@ -823,7 +829,8 @@ out:
  * to the newly allocated page in newpage.
  */
 static int unmap_and_move(new_page_t get_new_page, unsigned long private,
-			struct page *page, int force, bool offlining, bool sync)
+			struct page *page, int force, bool offlining,
+			enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -843,7 +850,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 		if (unlikely(split_huge_page(page)))
 			goto out;
 
-	rc = __unmap_and_move(page, newpage, force, offlining, sync);
+	rc = __unmap_and_move(page, newpage, force, offlining, mode);
 out:
 	if (rc != -EAGAIN) {
 		/*
@@ -891,7 +898,8 @@ out:
  */
 static int unmap_and_move_huge_page(new_page_t get_new_page,
 				unsigned long private, struct page *hpage,
-				int force, bool offlining, bool sync)
+				int force, bool offlining,
+				enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -904,7 +912,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	rc = -EAGAIN;
 
 	if (!trylock_page(hpage)) {
-		if (!force || !sync)
+		if (!force || mode != MIGRATE_SYNC)
 			goto out;
 		lock_page(hpage);
 	}
@@ -915,7 +923,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
 	if (!page_mapped(hpage))
-		rc = move_to_new_page(new_hpage, hpage, 1, sync);
+		rc = move_to_new_page(new_hpage, hpage, 1, mode);
 
 	if (rc)
 		remove_migration_ptes(hpage, hpage);
@@ -958,7 +966,7 @@ out:
  */
 int migrate_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		bool sync)
+		enum migrate_mode mode)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -979,7 +987,7 @@ int migrate_pages(struct list_head *from,
 
 			rc = unmap_and_move(get_new_page, private,
 						page, pass > 2, offlining,
-						sync);
+						mode);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -1009,7 +1017,7 @@ out:
 
 int migrate_huge_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		bool sync)
+		enum migrate_mode mode)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -1026,7 +1034,7 @@ int migrate_huge_pages(struct list_head *from,
 
 			rc = unmap_and_move_huge_page(get_new_page,
 					private, page, pass > 2, offlining,
-					sync);
+					mode);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -1155,7 +1163,7 @@ set_status:
 	err = 0;
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_page_node,
-				(unsigned long)pm, 0, true);
+				(unsigned long)pm, 0, MIGRATE_SYNC);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}
-- 
cgit v1.2.3


From 2bcf887963812c075f80a14e1fad8ec7e1c67acf Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 12 Jan 2012 17:19:56 -0800
Subject: mm: take pagevecs off reclaim stack

Replace pagevecs in putback_lru_pages() and move_active_pages_to_lru()
by lists of pages_to_free: then apply Konstantin Khlebnikov's
free_hot_cold_page_list() to them instead of pagevec_release().

Which simplifies the flow (no need to drop and retake lock whenever
pagevec fills up) and reduces stale addresses in stack backtraces
(which often showed through the pagevecs); but more importantly,
removes another 120 bytes from the deepest stacks in page reclaim.
Although I've not recently seen an actual stack overflow here with
a vanilla kernel, move_active_pages_to_lru() has often featured in
deep backtraces.

However, free_hot_cold_page_list() does not handle compound pages
(nor need it: a Transparent HugePage would have been split by the
time it reaches the call in shrink_page_list()), but it is possible
for putback_lru_pages() or move_active_pages_to_lru() to be left
holding the last reference on a THP, so must exclude the unlikely
compound case before putting on pages_to_free.

Remove pagevec_strip(), its work now done in move_active_pages_to_lru().
The pagevec in scan_mapping_unevictable_pages() remains in mm/vmscan.c,
but that is never on the reclaim path, and cannot be replaced by a list.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagevec.h |  2 --
 mm/swap.c               | 19 ----------------
 mm/vmscan.c             | 58 ++++++++++++++++++++++++++++++++++---------------
 3 files changed, 40 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index ed17024d2ebe..9def9121f8a2 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -22,7 +22,6 @@ struct pagevec {
 
 void __pagevec_release(struct pagevec *pvec);
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
-void pagevec_strip(struct pagevec *pvec);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
@@ -59,7 +58,6 @@ static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page)
 	return pagevec_space(pvec);
 }
 
-
 static inline void pagevec_release(struct pagevec *pvec)
 {
 	if (pagevec_count(pvec))
diff --git a/mm/swap.c b/mm/swap.c
index db6defaf2e55..79c22a649a3e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -23,7 +23,6 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/mm_inline.h>
-#include <linux/buffer_head.h>	/* for try_to_release_page() */
 #include <linux/percpu_counter.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
@@ -730,24 +729,6 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 
 EXPORT_SYMBOL(____pagevec_lru_add);
 
-/*
- * Try to drop buffers from the pages in a pagevec
- */
-void pagevec_strip(struct pagevec *pvec)
-{
-	int i;
-
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-
-		if (page_has_private(page) && trylock_page(page)) {
-			if (page_has_private(page))
-				try_to_release_page(page, 0);
-			unlock_page(page);
-		}
-	}
-}
-
 /**
  * pagevec_lookup - gang pagecache lookup
  * @pvec:	Where the resulting pages are placed
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 25f90383b391..7724fb8e7498 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1398,12 +1398,10 @@ putback_lru_pages(struct mem_cgroup_zone *mz, struct scan_control *sc,
 		  struct list_head *page_list)
 {
 	struct page *page;
-	struct pagevec pvec;
+	LIST_HEAD(pages_to_free);
 	struct zone *zone = mz->zone;
 	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
 
-	pagevec_init(&pvec, 1);
-
 	/*
 	 * Put back any unfreeable pages.
 	 */
@@ -1427,17 +1425,24 @@ putback_lru_pages(struct mem_cgroup_zone *mz, struct scan_control *sc,
 			int numpages = hpage_nr_pages(page);
 			reclaim_stat->recent_rotated[file] += numpages;
 		}
-		if (!pagevec_add(&pvec, page)) {
-			spin_unlock_irq(&zone->lru_lock);
-			__pagevec_release(&pvec);
-			spin_lock_irq(&zone->lru_lock);
+		if (put_page_testzero(page)) {
+			__ClearPageLRU(page);
+			__ClearPageActive(page);
+			del_page_from_lru_list(zone, page, lru);
+
+			if (unlikely(PageCompound(page))) {
+				spin_unlock_irq(&zone->lru_lock);
+				(*get_compound_page_dtor(page))(page);
+				spin_lock_irq(&zone->lru_lock);
+			} else
+				list_add(&page->lru, &pages_to_free);
 		}
 	}
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
 	__mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
 
 	spin_unlock_irq(&zone->lru_lock);
-	pagevec_release(&pvec);
+	free_hot_cold_page_list(&pages_to_free, 1);
 }
 
 static noinline_for_stack void
@@ -1647,13 +1652,23 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
 
 static void move_active_pages_to_lru(struct zone *zone,
 				     struct list_head *list,
+				     struct list_head *pages_to_free,
 				     enum lru_list lru)
 {
 	unsigned long pgmoved = 0;
-	struct pagevec pvec;
 	struct page *page;
 
-	pagevec_init(&pvec, 1);
+	if (buffer_heads_over_limit) {
+		spin_unlock_irq(&zone->lru_lock);
+		list_for_each_entry(page, list, lru) {
+			if (page_has_private(page) && trylock_page(page)) {
+				if (page_has_private(page))
+					try_to_release_page(page, 0);
+				unlock_page(page);
+			}
+		}
+		spin_lock_irq(&zone->lru_lock);
+	}
 
 	while (!list_empty(list)) {
 		struct lruvec *lruvec;
@@ -1667,12 +1682,17 @@ static void move_active_pages_to_lru(struct zone *zone,
 		list_move(&page->lru, &lruvec->lists[lru]);
 		pgmoved += hpage_nr_pages(page);
 
-		if (!pagevec_add(&pvec, page) || list_empty(list)) {
-			spin_unlock_irq(&zone->lru_lock);
-			if (buffer_heads_over_limit)
-				pagevec_strip(&pvec);
-			__pagevec_release(&pvec);
-			spin_lock_irq(&zone->lru_lock);
+		if (put_page_testzero(page)) {
+			__ClearPageLRU(page);
+			__ClearPageActive(page);
+			del_page_from_lru_list(zone, page, lru);
+
+			if (unlikely(PageCompound(page))) {
+				spin_unlock_irq(&zone->lru_lock);
+				(*get_compound_page_dtor(page))(page);
+				spin_lock_irq(&zone->lru_lock);
+			} else
+				list_add(&page->lru, pages_to_free);
 		}
 	}
 	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1766,12 +1786,14 @@ static void shrink_active_list(unsigned long nr_pages,
 	 */
 	reclaim_stat->recent_rotated[file] += nr_rotated;
 
-	move_active_pages_to_lru(zone, &l_active,
+	move_active_pages_to_lru(zone, &l_active, &l_hold,
 						LRU_ACTIVE + file * LRU_FILE);
-	move_active_pages_to_lru(zone, &l_inactive,
+	move_active_pages_to_lru(zone, &l_inactive, &l_hold,
 						LRU_BASE   + file * LRU_FILE);
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
+
+	free_hot_cold_page_list(&l_hold, 1);
 }
 
 #ifdef CONFIG_SWAP
-- 
cgit v1.2.3


From 5095ae83759f035c823fb375c6ed2de99c81d5ec Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 12 Jan 2012 17:19:58 -0800
Subject: mm: fewer underscores in ____pagevec_lru_add

What's so special about ____pagevec_lru_add() that it needs four leading
underscores?  Nothing, it just helped to distinguish from
__pagevec_lru_add() in 2.6.28 development.  Cut two leading underscores.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagevec.h | 10 +++++-----
 mm/swap.c               | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 9def9121f8a2..2aa12b8499c0 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -21,7 +21,7 @@ struct pagevec {
 };
 
 void __pagevec_release(struct pagevec *pvec);
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
+void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
@@ -66,22 +66,22 @@ static inline void pagevec_release(struct pagevec *pvec)
 
 static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
+	__pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
+	__pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+	__pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
 }
 
 static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+	__pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
 }
 
 static inline void pagevec_lru_add_file(struct pagevec *pvec)
diff --git a/mm/swap.c b/mm/swap.c
index 79c22a649a3e..e1cd623d9b2b 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -378,7 +378,7 @@ void __lru_cache_add(struct page *page, enum lru_list lru)
 
 	page_cache_get(page);
 	if (!pagevec_add(pvec, page))
-		____pagevec_lru_add(pvec, lru);
+		__pagevec_lru_add(pvec, lru);
 	put_cpu_var(lru_add_pvecs);
 }
 EXPORT_SYMBOL(__lru_cache_add);
@@ -506,7 +506,7 @@ static void drain_cpu_pagevecs(int cpu)
 	for_each_lru(lru) {
 		pvec = &pvecs[lru - LRU_BASE];
 		if (pagevec_count(pvec))
-			____pagevec_lru_add(pvec, lru);
+			__pagevec_lru_add(pvec, lru);
 	}
 
 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
@@ -698,7 +698,7 @@ void lru_add_page_tail(struct zone* zone,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static void ____pagevec_lru_add_fn(struct page *page, void *arg)
+static void __pagevec_lru_add_fn(struct page *page, void *arg)
 {
 	enum lru_list lru = (enum lru_list)arg;
 	struct zone *zone = page_zone(page);
@@ -720,14 +720,14 @@ static void ____pagevec_lru_add_fn(struct page *page, void *arg)
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
+void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
 	VM_BUG_ON(is_unevictable_lru(lru));
 
-	pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
+	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, (void *)lru);
 }
 
-EXPORT_SYMBOL(____pagevec_lru_add);
+EXPORT_SYMBOL(__pagevec_lru_add);
 
 /**
  * pagevec_lookup - gang pagecache lookup
-- 
cgit v1.2.3


From 4111304dab198c687bc60f2e235a9f7ee92c47c8 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 12 Jan 2012 17:20:01 -0800
Subject: mm: enum lru_list lru

Mostly we use "enum lru_list lru": change those few "l"s to "lru"s.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 26 +++++++++++++-------------
 include/linux/mmzone.h    | 16 ++++++++--------
 mm/page_alloc.c           |  6 +++---
 mm/vmscan.c               | 22 +++++++++++-----------
 4 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 4e3478e71926..8f84d2e53d0f 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -22,21 +22,21 @@ static inline int page_is_file_cache(struct page *page)
 }
 
 static inline void
-add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
 {
 	struct lruvec *lruvec;
 
-	lruvec = mem_cgroup_lru_add_list(zone, page, l);
-	list_add(&page->lru, &lruvec->lists[l]);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
+	lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+	list_add(&page->lru, &lruvec->lists[lru]);
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, hpage_nr_pages(page));
 }
 
 static inline void
-del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
 {
-	mem_cgroup_lru_del_list(page, l);
+	mem_cgroup_lru_del_list(page, lru);
 	list_del(&page->lru);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page));
 }
 
 /**
@@ -57,21 +57,21 @@ static inline enum lru_list page_lru_base_type(struct page *page)
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
-	enum lru_list l;
+	enum lru_list lru;
 
 	if (PageUnevictable(page)) {
 		__ClearPageUnevictable(page);
-		l = LRU_UNEVICTABLE;
+		lru = LRU_UNEVICTABLE;
 	} else {
-		l = page_lru_base_type(page);
+		lru = page_lru_base_type(page);
 		if (PageActive(page)) {
 			__ClearPageActive(page);
-			l += LRU_ACTIVE;
+			lru += LRU_ACTIVE;
 		}
 	}
-	mem_cgroup_lru_del_list(page, l);
+	mem_cgroup_lru_del_list(page, lru);
 	list_del(&page->lru);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page));
 }
 
 /**
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2038b90ca6e3..650ba2fb3301 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -140,23 +140,23 @@ enum lru_list {
 	NR_LRU_LISTS
 };
 
-#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
+#define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++)
 
-#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
+#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++)
 
-static inline int is_file_lru(enum lru_list l)
+static inline int is_file_lru(enum lru_list lru)
 {
-	return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
+	return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE);
 }
 
-static inline int is_active_lru(enum lru_list l)
+static inline int is_active_lru(enum lru_list lru)
 {
-	return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
+	return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
 }
 
-static inline int is_unevictable_lru(enum lru_list l)
+static inline int is_unevictable_lru(enum lru_list lru)
 {
-	return (l == LRU_UNEVICTABLE);
+	return (lru == LRU_UNEVICTABLE);
 }
 
 struct lruvec {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cb5723c491f0..0027d8f4a1bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4262,7 +4262,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize, memmap_pages;
-		enum lru_list l;
+		enum lru_list lru;
 
 		size = zone_spanned_pages_in_node(nid, j, zones_size);
 		realsize = size - zone_absent_pages_in_node(nid, j,
@@ -4312,8 +4312,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone->zone_pgdat = pgdat;
 
 		zone_pcp_init(zone);
-		for_each_lru(l)
-			INIT_LIST_HEAD(&zone->lruvec.lists[l]);
+		for_each_lru(lru)
+			INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
 		zone->reclaim_stat.recent_scanned[0] = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7724fb8e7498..01466bf783fd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1920,7 +1920,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
 	unsigned long ap, fp;
 	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
 	u64 fraction[2], denominator;
-	enum lru_list l;
+	enum lru_list lru;
 	int noswap = 0;
 	bool force_scan = false;
 
@@ -2010,18 +2010,18 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
 	fraction[1] = fp;
 	denominator = ap + fp + 1;
 out:
-	for_each_evictable_lru(l) {
-		int file = is_file_lru(l);
+	for_each_evictable_lru(lru) {
+		int file = is_file_lru(lru);
 		unsigned long scan;
 
-		scan = zone_nr_lru_pages(mz, l);
+		scan = zone_nr_lru_pages(mz, lru);
 		if (priority || noswap) {
 			scan >>= priority;
 			if (!scan && force_scan)
 				scan = SWAP_CLUSTER_MAX;
 			scan = div64_u64(scan * fraction[file], denominator);
 		}
-		nr[l] = scan;
+		nr[lru] = scan;
 	}
 }
 
@@ -2097,7 +2097,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
 {
 	unsigned long nr[NR_LRU_LISTS];
 	unsigned long nr_to_scan;
-	enum lru_list l;
+	enum lru_list lru;
 	unsigned long nr_reclaimed, nr_scanned;
 	unsigned long nr_to_reclaim = sc->nr_to_reclaim;
 	struct blk_plug plug;
@@ -2110,13 +2110,13 @@ restart:
 	blk_start_plug(&plug);
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
 					nr[LRU_INACTIVE_FILE]) {
-		for_each_evictable_lru(l) {
-			if (nr[l]) {
+		for_each_evictable_lru(lru) {
+			if (nr[lru]) {
 				nr_to_scan = min_t(unsigned long,
-						   nr[l], SWAP_CLUSTER_MAX);
-				nr[l] -= nr_to_scan;
+						   nr[lru], SWAP_CLUSTER_MAX);
+				nr[lru] -= nr_to_scan;
 
-				nr_reclaimed += shrink_list(l, nr_to_scan,
+				nr_reclaimed += shrink_list(lru, nr_to_scan,
 							    mz, sc, priority);
 			}
 		}
-- 
cgit v1.2.3


From 1c1c53d43b387d02174911ecb42ce846577b0ea0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 12 Jan 2012 17:20:04 -0800
Subject: mm: remove del_page_from_lru, add page_off_lru

del_page_from_lru() repeats del_page_from_lru_list(), also working out
which LRU the page was on, clearing the relevant bits.  Decouple those
functions: remove del_page_from_lru() and add page_off_lru().

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 15 +++++++++------
 mm/swap.c                 |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 8f84d2e53d0f..227fd3e9a9c9 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -54,8 +54,14 @@ static inline enum lru_list page_lru_base_type(struct page *page)
 	return LRU_INACTIVE_ANON;
 }
 
-static inline void
-del_page_from_lru(struct zone *zone, struct page *page)
+/**
+ * page_off_lru - which LRU list was page on? clearing its lru flags.
+ * @page: the page to test
+ *
+ * Returns the LRU list a page was on, as an index into the array of LRU
+ * lists; and clears its Unevictable or Active flags, ready for freeing.
+ */
+static inline enum lru_list page_off_lru(struct page *page)
 {
 	enum lru_list lru;
 
@@ -69,9 +75,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
 			lru += LRU_ACTIVE;
 		}
 	}
-	mem_cgroup_lru_del_list(page, lru);
-	list_del(&page->lru);
-	__mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page));
+	return lru;
 }
 
 /**
@@ -92,7 +96,6 @@ static inline enum lru_list page_lru(struct page *page)
 		if (PageActive(page))
 			lru += LRU_ACTIVE;
 	}
-
 	return lru;
 }
 
diff --git a/mm/swap.c b/mm/swap.c
index 0d1b24b3fa87..b0f529b38979 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -53,7 +53,7 @@ static void __page_cache_release(struct page *page)
 		spin_lock_irqsave(&zone->lru_lock, flags);
 		VM_BUG_ON(!PageLRU(page));
 		__ClearPageLRU(page);
-		del_page_from_lru(zone, page);
+		del_page_from_lru_list(zone, page, page_off_lru(page));
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
 }
@@ -617,7 +617,7 @@ void release_pages(struct page **pages, int nr, int cold)
 			}
 			VM_BUG_ON(!PageLRU(page));
 			__ClearPageLRU(page);
-			del_page_from_lru(zone, page);
+			del_page_from_lru_list(zone, page, page_off_lru(page));
 		}
 
 		list_add(&page->lru, &pages_to_free);
-- 
cgit v1.2.3


From a3dd3323058d281abd584b15ad4c5b65064d7a61 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 12 Jan 2012 17:20:11 -0800
Subject: kexec: remove KMSG_DUMP_KEXEC

KMSG_DUMP_KEXEC is useless because we already save kernel messages inside
/proc/vmcore, and it is unsafe to allow modules to do other stuffs in a
crash dump scenario.

[akpm@linux-foundation.org: fix powerpc build]
Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Reported-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/pseries/nvram.c | 1 -
 drivers/char/ramoops.c                 | 3 +--
 drivers/mtd/mtdoops.c                  | 3 +--
 include/linux/kmsg_dump.h              | 1 -
 kernel/kexec.c                         | 3 ---
 5 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 330a57b7c17c..36f957f31842 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -638,7 +638,6 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
 		/* These are almost always orderly shutdowns. */
 		return;
 	case KMSG_DUMP_OOPS:
-	case KMSG_DUMP_KEXEC:
 		break;
 	case KMSG_DUMP_PANIC:
 		panicking = true;
diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
index 7c7f42a1f880..feda90cdac9f 100644
--- a/drivers/char/ramoops.c
+++ b/drivers/char/ramoops.c
@@ -83,8 +83,7 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
 	struct timeval timestamp;
 
 	if (reason != KMSG_DUMP_OOPS &&
-	    reason != KMSG_DUMP_PANIC &&
-	    reason != KMSG_DUMP_KEXEC)
+	    reason != KMSG_DUMP_PANIC)
 		return;
 
 	/* Only dump oopses if dump_oops is set */
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index db8e8272d69b..3ce99e00a49e 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -315,8 +315,7 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
 	char *dst;
 
 	if (reason != KMSG_DUMP_OOPS &&
-	    reason != KMSG_DUMP_PANIC &&
-	    reason != KMSG_DUMP_KEXEC)
+	    reason != KMSG_DUMP_PANIC)
 		return;
 
 	/* Only dump oopses if dump_oops is set */
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index ee0c952188de..fee66317e071 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -18,7 +18,6 @@
 enum kmsg_dump_reason {
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_PANIC,
-	KMSG_DUMP_KEXEC,
 	KMSG_DUMP_RESTART,
 	KMSG_DUMP_HALT,
 	KMSG_DUMP_POWEROFF,
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 090ee10d9604..20ed47ae252f 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -32,7 +32,6 @@
 #include <linux/console.h>
 #include <linux/vmalloc.h>
 #include <linux/swap.h>
-#include <linux/kmsg_dump.h>
 #include <linux/syscore_ops.h>
 
 #include <asm/page.h>
@@ -1094,8 +1093,6 @@ void crash_kexec(struct pt_regs *regs)
 		if (kexec_crash_image) {
 			struct pt_regs fixed_regs;
 
-			kmsg_dump(KMSG_DUMP_KEXEC);
-
 			crash_setup_regs(&fixed_regs, regs);
 			crash_save_vmcoreinfo();
 			machine_crash_shutdown(&fixed_regs);
-- 
cgit v1.2.3


From 1f536b9e9f85456df93614b3c2f6a1a2b7d7cb9b Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Thu, 12 Jan 2012 17:20:20 -0800
Subject: include/linux/crash_dump.h needs elf.h

Building an ARM target we get the following warnings:

  CC      arch/arm/kernel/setup.o
  In file included from arch/arm/kernel/setup.c:39:
  arch/arm/include/asm/elf.h:102:1: warning: "vmcore_elf64_check_arch" redefined
  In file included from arch/arm/kernel/setup.c:24:
  include/linux/crash_dump.h:30:1: warning: this is the location of the previous definition

Quoting Russell King:

"linux/crash_dump.h makes no attempt to include asm/elf.h, but it depends
on stuff in asm/elf.h to determine how stuff inside this file is defined
at parse time.

So, if asm/elf.h is included after linux/crash_dump.h or not at all, you
get a different result from the situation where asm/elf.h is included
before."

So add elf.h header to crash_dump.h to avoid this problem.

The original discussion about this can be found at:
http://www.spinics.net/lists/arm-kernel/msg154113.html

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: <stable@vger.kernel.org>	[3.2.1]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crash_dump.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 5c4abce94ad1..b936763f2236 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -5,6 +5,7 @@
 #include <linux/kexec.h>
 #include <linux/device.h>
 #include <linux/proc_fs.h>
+#include <linux/elf.h>
 
 #define ELFCORE_ADDR_MAX	(-1ULL)
 #define ELFCORE_ADDR_ERR	(-2ULL)
-- 
cgit v1.2.3


From 87192a2a49c475cf322cb143e0fa63b0102d8567 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 12 Jan 2012 17:20:34 -0800
Subject: vfs: cache request_queue in struct block_device

This makes it possible to get from the inode to the request_queue with one
less cache miss.  Used in followon optimization.

The livetime of the pointer is the same as the gendisk.

This assumes that the queue will always stay the same in the gendisk while
it's visible to block_devices.  I think that's safe correct?

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/block_dev.c     | 3 +++
 include/linux/fs.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index afe74dda632b..0e575d1304b4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1139,6 +1139,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
+		bdev->bd_queue = disk->queue;
 		bdev->bd_contains = bdev;
 		if (!partno) {
 			struct backing_dev_info *bdi;
@@ -1159,6 +1160,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					disk_put_part(bdev->bd_part);
 					bdev->bd_part = NULL;
 					bdev->bd_disk = NULL;
+					bdev->bd_queue = NULL;
 					mutex_unlock(&bdev->bd_mutex);
 					disk_unblock_events(disk);
 					put_disk(disk);
@@ -1232,6 +1234,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	disk_put_part(bdev->bd_part);
 	bdev->bd_disk = NULL;
 	bdev->bd_part = NULL;
+	bdev->bd_queue = NULL;
 	bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
 	if (bdev != bdev->bd_contains)
 		__blkdev_put(bdev->bd_contains, mode, 1);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e694bd4434a4..4bc8169fb5a1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -660,6 +660,7 @@ struct address_space {
 	 * must be enforced here for CRIS, to let the least significant bit
 	 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
 	 */
+struct request_queue;
 
 struct block_device {
 	dev_t			bd_dev;  /* not a kdev_t - it's a search key */
@@ -682,6 +683,7 @@ struct block_device {
 	unsigned		bd_part_count;
 	int			bd_invalidated;
 	struct gendisk *	bd_disk;
+	struct request_queue *  bd_queue;
 	struct list_head	bd_list;
 	/*
 	 * Private data.  You must have bd_claim'ed the block_device
-- 
cgit v1.2.3


From 928da837aca77a9d3cb5076bf07b3224b1ba293b Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 12 Jan 2012 17:20:39 -0800
Subject: radix_tree: remove radix_tree_indirect_to_ptr()

It is not used anymore, remove it

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 9d4539c52e53..07e360b1b282 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -49,9 +49,6 @@
 #define RADIX_TREE_EXCEPTIONAL_ENTRY	2
 #define RADIX_TREE_EXCEPTIONAL_SHIFT	2
 
-#define radix_tree_indirect_to_ptr(ptr) \
-	radix_tree_indirect_to_ptr((void __force *)(ptr))
-
 static inline int radix_tree_is_indirect_ptr(void *ptr)
 {
 	return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
-- 
cgit v1.2.3


From 028ee4be34a09a6d48bdf30ab991ae933a7bc036 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 12 Jan 2012 17:20:55 -0800
Subject: c/r: prctl: add PR_SET_MM codes to set up mm_struct entries

When we restore a task we need to set up text, data and data heap sizes
from userspace to the values a task had at checkpoint time.  This patch
adds auxilary prctl codes for that.

While most of them have a statistical nature (their values are involved
into calculation of /proc/<pid>/statm output) the start_brk and brk values
are used to compute an allowed size of program data segment expansion.
Which means an arbitrary changes of this values might be dangerous
operation.  So to restrict access the following requirements applied to
prctl calls:

 - The process has to have CAP_SYS_ADMIN capability granted.
 - For all opcodes except start_brk/brk members an appropriate
   VMA area must exist and should fit certain VMA flags,
   such as:
   - code segment must be executable but not writable;
   - data segment must not be executable.

start_brk/brk values must not intersect with data segment and must not
exceed RLIMIT_DATA resource limit.

Still the main guard is CAP_SYS_ADMIN capability check.

Note the kernel should be compiled with CONFIG_CHECKPOINT_RESTORE support
otherwise these prctl calls will return -EINVAL.

[akpm@linux-foundation.org: cache current->mm in a local, saving 200 bytes text]
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prctl.h |  12 +++++
 kernel/sys.c          | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a3baeb2c2161..7ddc7f1b480f 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -102,4 +102,16 @@
 
 #define PR_MCE_KILL_GET 34
 
+/*
+ * Tune up process memory map specifics.
+ */
+#define PR_SET_MM		35
+# define PR_SET_MM_START_CODE		1
+# define PR_SET_MM_END_CODE		2
+# define PR_SET_MM_START_DATA		3
+# define PR_SET_MM_END_DATA		4
+# define PR_SET_MM_START_STACK		5
+# define PR_SET_MM_START_BRK		6
+# define PR_SET_MM_BRK			7
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index ddf8155bf3f8..40701538fbd1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1692,6 +1692,124 @@ SYSCALL_DEFINE1(umask, int, mask)
 	return mask;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int prctl_set_mm(int opt, unsigned long addr,
+			unsigned long arg4, unsigned long arg5)
+{
+	unsigned long rlim = rlimit(RLIMIT_DATA);
+	unsigned long vm_req_flags;
+	unsigned long vm_bad_flags;
+	struct vm_area_struct *vma;
+	int error = 0;
+	struct mm_struct *mm = current->mm;
+
+	if (arg4 | arg5)
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (addr >= TASK_SIZE)
+		return -EINVAL;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, addr);
+
+	if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
+		/* It must be existing VMA */
+		if (!vma || vma->vm_start > addr)
+			goto out;
+	}
+
+	error = -EINVAL;
+	switch (opt) {
+	case PR_SET_MM_START_CODE:
+	case PR_SET_MM_END_CODE:
+		vm_req_flags = VM_READ | VM_EXEC;
+		vm_bad_flags = VM_WRITE | VM_MAYSHARE;
+
+		if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
+		    (vma->vm_flags & vm_bad_flags))
+			goto out;
+
+		if (opt == PR_SET_MM_START_CODE)
+			mm->start_code = addr;
+		else
+			mm->end_code = addr;
+		break;
+
+	case PR_SET_MM_START_DATA:
+	case PR_SET_MM_END_DATA:
+		vm_req_flags = VM_READ | VM_WRITE;
+		vm_bad_flags = VM_EXEC | VM_MAYSHARE;
+
+		if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
+		    (vma->vm_flags & vm_bad_flags))
+			goto out;
+
+		if (opt == PR_SET_MM_START_DATA)
+			mm->start_data = addr;
+		else
+			mm->end_data = addr;
+		break;
+
+	case PR_SET_MM_START_STACK:
+
+#ifdef CONFIG_STACK_GROWSUP
+		vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
+#else
+		vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
+#endif
+		if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
+			goto out;
+
+		mm->start_stack = addr;
+		break;
+
+	case PR_SET_MM_START_BRK:
+		if (addr <= mm->end_data)
+			goto out;
+
+		if (rlim < RLIM_INFINITY &&
+		    (mm->brk - addr) +
+		    (mm->end_data - mm->start_data) > rlim)
+			goto out;
+
+		mm->start_brk = addr;
+		break;
+
+	case PR_SET_MM_BRK:
+		if (addr <= mm->end_data)
+			goto out;
+
+		if (rlim < RLIM_INFINITY &&
+		    (addr - mm->start_brk) +
+		    (mm->end_data - mm->start_data) > rlim)
+			goto out;
+
+		mm->brk = addr;
+		break;
+
+	default:
+		error = -EINVAL;
+		goto out;
+	}
+
+	error = 0;
+
+out:
+	up_read(&mm->mmap_sem);
+
+	return error;
+}
+#else /* CONFIG_CHECKPOINT_RESTORE */
+static int prctl_set_mm(int opt, unsigned long addr,
+			unsigned long arg4, unsigned long arg5)
+{
+	return -EINVAL;
+}
+#endif
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -1841,6 +1959,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			else
 				error = PR_MCE_KILL_DEFAULT;
 			break;
+		case PR_SET_MM:
+			error = prctl_set_mm(arg2, arg3, arg4, arg5);
+			break;
 		default:
 			error = -EINVAL;
 			break;
-- 
cgit v1.2.3


From 6898e3bd11cc9a931ef115eee9000ac9d8f8c3cf Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 13 Jan 2012 08:15:33 +0100
Subject: block: Stop using macro stubs for the bio data integrity calls

Replace preprocessor macro stubs with real function declarations to
prevent warnings when CONFIG_BLK_DEV_INTEGRITY is disabled.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 66 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 847994aef0e9..129a9c097958 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -515,24 +515,64 @@ extern void bio_integrity_init(void);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
-#define bio_integrity(a)		(0)
-#define bioset_integrity_create(a, b)	(0)
-#define bio_integrity_prep(a)		(0)
-#define bio_integrity_enabled(a)	(0)
+static inline int bio_integrity(struct bio *bio)
+{
+	return 0;
+}
+
+static inline int bio_integrity_enabled(struct bio *bio)
+{
+	return 0;
+}
+
+static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
+{
+	return 0;
+}
+
+static inline void bioset_integrity_free (struct bio_set *bs)
+{
+	return;
+}
+
+static inline int bio_integrity_prep(struct bio *bio)
+{
+	return 0;
+}
+
+static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs)
+{
+	return;
+}
+
 static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
 				      gfp_t gfp_mask, struct bio_set *bs)
 {
 	return 0;
 }
-#define bioset_integrity_free(a)	do { } while (0)
-#define bio_integrity_free(a, b)	do { } while (0)
-#define bio_integrity_endio(a, b)	do { } while (0)
-#define bio_integrity_advance(a, b)	do { } while (0)
-#define bio_integrity_trim(a, b, c)	do { } while (0)
-#define bio_integrity_split(a, b, c)	do { } while (0)
-#define bio_integrity_set_tag(a, b, c)	do { } while (0)
-#define bio_integrity_get_tag(a, b, c)	do { } while (0)
-#define bio_integrity_init(a)		do { } while (0)
+
+static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
+				       int sectors)
+{
+	return;
+}
+
+static inline void bio_integrity_advance(struct bio *bio,
+					 unsigned int bytes_done)
+{
+	return;
+}
+
+static inline void bio_integrity_trim(struct bio *bio, unsigned int offset,
+				      unsigned int sectors)
+{
+	return;
+}
+
+static inline void bio_integrity_init(void)
+{
+	return;
+}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit v1.2.3


From 7c7c7f01cc5e3e423120a4848a73dd5e4586f2f9 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 11 Jan 2012 19:30:38 +0000
Subject: vhost-net: add module alias (v2.1)

By adding some module aliases, programs (or users) won't have to explicitly
call modprobe. Vhost-net will always be available if built into the kernel.
It does require assigning a permanent minor number for depmod to work.

Also:
  - use C99 style initialization.
  - add missing entry in documentation for loop-control

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-By: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devices.txt  | 3 +++
 drivers/vhost/net.c        | 8 +++++---
 include/linux/miscdevice.h | 1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index cec8864ce4e8..00383186d8fb 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -447,6 +447,9 @@ Your cooperation is appreciated.
 		234 = /dev/btrfs-control	Btrfs control device
 		235 = /dev/autofs	Autofs control device
 		236 = /dev/mapper/control	Device-Mapper control device
+		237 = /dev/loop-control Loopback control device
+		238 = /dev/vhost-net	Host kernel accelerator for virtio net
+
 		240-254			Reserved for local use
 		255			Reserved for MISC_DYNAMIC_MINOR
 
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 882a51fe7b3c..9dab1f51dd43 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -856,9 +856,9 @@ static const struct file_operations vhost_net_fops = {
 };
 
 static struct miscdevice vhost_net_misc = {
-	MISC_DYNAMIC_MINOR,
-	"vhost-net",
-	&vhost_net_fops,
+	.minor = VHOST_NET_MINOR,
+	.name = "vhost-net",
+	.fops = &vhost_net_fops,
 };
 
 static int vhost_net_init(void)
@@ -879,3 +879,5 @@ MODULE_VERSION("0.0.1");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Michael S. Tsirkin");
 MODULE_DESCRIPTION("Host kernel accelerator for virtio net");
+MODULE_ALIAS_MISCDEV(VHOST_NET_MINOR);
+MODULE_ALIAS("devname:vhost-net");
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 32085249e9cb..0549d2115507 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -42,6 +42,7 @@
 #define AUTOFS_MINOR		235
 #define MAPPER_CTRL_MINOR	236
 #define LOOP_CTRL_MINOR		237
+#define VHOST_NET_MINOR		238
 #define MISC_DYNAMIC_MINOR	255
 
 struct device;
-- 
cgit v1.2.3


From 577ebb374c78314ac4617242f509e2f5e7156649 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 12 Jan 2012 16:01:27 +0100
Subject: block: add and use scsi_blk_cmd_ioctl

Introduce a wrapper around scsi_cmd_ioctl that takes a block device.

The function will then be enhanced to detect partition block devices
and, in that case, subject the ioctls to whitelisting.

Cc: linux-scsi@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: James Bottomley <JBottomley@parallels.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/scsi_ioctl.c             | 7 +++++++
 drivers/block/cciss.c          | 6 +++---
 drivers/block/ub.c             | 3 +--
 drivers/block/virtio_blk.c     | 4 ++--
 drivers/cdrom/cdrom.c          | 3 +--
 drivers/ide/ide-floppy_ioctl.c | 3 +--
 drivers/scsi/sd.c              | 2 +-
 include/linux/blkdev.h         | 2 ++
 8 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index fbdf0d802ec4..a2c11f330872 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -690,6 +690,13 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
 }
 EXPORT_SYMBOL(scsi_cmd_ioctl);
 
+int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
+		       unsigned int cmd, void __user *arg)
+{
+	return scsi_cmd_ioctl(bd->bd_disk->queue, bd->bd_disk, mode, cmd, arg);
+}
+EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
+
 static int __init blk_scsi_ioctl_init(void)
 {
 	blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 587cce57adae..b0f553b26d0f 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1735,7 +1735,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 	case CCISS_BIG_PASSTHRU:
 		return cciss_bigpassthru(h, argp);
 
-	/* scsi_cmd_ioctl handles these, below, though some are not */
+	/* scsi_cmd_blk_ioctl handles these, below, though some are not */
 	/* very meaningful for cciss.  SG_IO is the main one people want. */
 
 	case SG_GET_VERSION_NUM:
@@ -1746,9 +1746,9 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 	case SG_EMULATED_HOST:
 	case SG_IO:
 	case SCSI_IOCTL_SEND_COMMAND:
-		return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
+		return scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
 
-	/* scsi_cmd_ioctl would normally handle these, below, but */
+	/* scsi_cmd_blk_ioctl would normally handle these, below, but */
 	/* they aren't a good fit for cciss, as CD-ROMs are */
 	/* not supported, and we don't have any bus/target/lun */
 	/* which we present to the kernel. */
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 0e376d46bdd1..7333b9e44411 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -1744,12 +1744,11 @@ static int ub_bd_release(struct gendisk *disk, fmode_t mode)
 static int ub_bd_ioctl(struct block_device *bdev, fmode_t mode,
     unsigned int cmd, unsigned long arg)
 {
-	struct gendisk *disk = bdev->bd_disk;
 	void __user *usermem = (void __user *) arg;
 	int ret;
 
 	mutex_lock(&ub_mutex);
-	ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem);
+	ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, usermem);
 	mutex_unlock(&ub_mutex);
 
 	return ret;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index ffd5ca919295..c4a60badf252 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -250,8 +250,8 @@ static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
 		return -ENOTTY;
 
-	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
-			      (void __user *)data);
+	return scsi_cmd_blk_ioctl(bdev, mode, cmd,
+				  (void __user *)data);
 }
 
 /* We provide getgeo only to please some old bootloader/partitioning tools */
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 1bbf7645a97c..55eaf474d32c 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2746,12 +2746,11 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
 {
 	void __user *argp = (void __user *)arg;
 	int ret;
-	struct gendisk *disk = bdev->bd_disk;
 
 	/*
 	 * Try the generic SCSI command ioctl's first.
 	 */
-	ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
+	ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
 	if (ret != -ENOTTY)
 		return ret;
 
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index d267b7affad6..a22ca8467010 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c
@@ -292,8 +292,7 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct block_device *bdev,
 	 * and CDROM_SEND_PACKET (legacy) ioctls
 	 */
 	if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND)
-		err = scsi_cmd_ioctl(bdev->bd_disk->queue, bdev->bd_disk,
-				mode, cmd, argp);
+		err = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
 
 	if (err == -ENOTTY)
 		err = generic_ide_ioctl(drive, bdev, cmd, arg);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 7b3f8075e2a5..b4d57bb04c72 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1097,7 +1097,7 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode,
 			error = scsi_ioctl(sdp, cmd, p);
 			break;
 		default:
-			error = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, p);
+			error = scsi_cmd_blk_ioctl(bdev, mode, cmd, p);
 			if (error != -ENOTTY)
 				break;
 			error = scsi_ioctl(sdp, cmd, p);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 94acd8172b5b..ca7b869508c7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -675,6 +675,8 @@ extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
+extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
+			      unsigned int, void __user *);
 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			  unsigned int, void __user *);
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
-- 
cgit v1.2.3


From 0bfc96cb77224736dfa35c3c555d37b3646ef35e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 12 Jan 2012 16:01:28 +0100
Subject: block: fail SCSI passthrough ioctls on partition devices

Linux allows executing the SG_IO ioctl on a partition or LVM volume, and
will pass the command to the underlying block device.  This is
well-known, but it is also a large security problem when (via Unix
permissions, ACLs, SELinux or a combination thereof) a program or user
needs to be granted access only to part of the disk.

This patch lets partitions forward a small set of harmless ioctls;
others are logged with printk so that we can see which ioctls are
actually sent.  In my tests only CDROM_GET_CAPABILITY actually occurred.
Of course it was being sent to a (partition on a) hard disk, so it would
have failed with ENOTTY and the patch isn't changing anything in
practice.  Still, I'm treating it specially to avoid spamming the logs.

In principle, this restriction should include programs running with
CAP_SYS_RAWIO.  If for example I let a program access /dev/sda2 and
/dev/sdb, it still should not be able to read/write outside the
boundaries of /dev/sda2 independent of the capabilities.  However, for
now programs with CAP_SYS_RAWIO will still be allowed to send the
ioctls.  Their actions will still be logged.

This patch does not affect the non-libata IDE driver.  That driver
however already tests for bd != bd->bd_contains before issuing some
ioctl; it could be restricted further to forbid these ioctls even for
programs running with CAP_SYS_ADMIN/CAP_SYS_RAWIO.

Cc: linux-scsi@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: James Bottomley <JBottomley@parallels.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[ Make it also print the command name when warning - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/scsi_ioctl.c     | 45 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.c      | 11 +++++++++--
 include/linux/blkdev.h |  1 +
 3 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index a2c11f330872..260fa80ef575 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -24,6 +24,7 @@
 #include <linux/capability.h>
 #include <linux/completion.h>
 #include <linux/cdrom.h>
+#include <linux/ratelimit.h>
 #include <linux/slab.h>
 #include <linux/times.h>
 #include <asm/uaccess.h>
@@ -690,9 +691,53 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
 }
 EXPORT_SYMBOL(scsi_cmd_ioctl);
 
+int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
+{
+	if (bd && bd == bd->bd_contains)
+		return 0;
+
+	/* Actually none of these is particularly useful on a partition,
+	 * but they are safe.
+	 */
+	switch (cmd) {
+	case SCSI_IOCTL_GET_IDLUN:
+	case SCSI_IOCTL_GET_BUS_NUMBER:
+	case SCSI_IOCTL_GET_PCI:
+	case SCSI_IOCTL_PROBE_HOST:
+	case SG_GET_VERSION_NUM:
+	case SG_SET_TIMEOUT:
+	case SG_GET_TIMEOUT:
+	case SG_GET_RESERVED_SIZE:
+	case SG_SET_RESERVED_SIZE:
+	case SG_EMULATED_HOST:
+		return 0;
+	case CDROM_GET_CAPABILITY:
+		/* Keep this until we remove the printk below.  udev sends it
+		 * and we do not want to spam dmesg about it.   CD-ROMs do
+		 * not have partitions, so we get here only for disks.
+		 */
+		return -ENOIOCTLCMD;
+	default:
+		break;
+	}
+
+	/* In particular, rule out all resets and host-specific ioctls.  */
+	printk_ratelimited(KERN_WARNING
+			   "%s: sending ioctl %x to a partition!\n", current->comm, cmd);
+
+	return capable(CAP_SYS_RAWIO) ? 0 : -ENOIOCTLCMD;
+}
+EXPORT_SYMBOL(scsi_verify_blk_ioctl);
+
 int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
 		       unsigned int cmd, void __user *arg)
 {
+	int ret;
+
+	ret = scsi_verify_blk_ioctl(bd, cmd);
+	if (ret < 0)
+		return ret;
+
 	return scsi_cmd_ioctl(bd->bd_disk->queue, bd->bd_disk, mode, cmd, arg);
 }
 EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b4d57bb04c72..c691fb50e6cb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1075,6 +1075,10 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode,
 	SCSI_LOG_IOCTL(1, sd_printk(KERN_INFO, sdkp, "sd_ioctl: disk=%s, "
 				    "cmd=0x%x\n", disk->disk_name, cmd));
 
+	error = scsi_verify_blk_ioctl(bdev, cmd);
+	if (error < 0)
+		return error;
+
 	/*
 	 * If we are in the middle of error recovery, don't let anyone
 	 * else try and use this device.  Also, if error recovery fails, it
@@ -1267,6 +1271,11 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
 			   unsigned int cmd, unsigned long arg)
 {
 	struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device;
+	int ret;
+
+	ret = scsi_verify_blk_ioctl(bdev, cmd);
+	if (ret < 0)
+		return ret;
 
 	/*
 	 * If we are in the middle of error recovery, don't let anyone
@@ -1278,8 +1287,6 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
 		return -ENODEV;
 	       
 	if (sdev->host->hostt->compat_ioctl) {
-		int ret;
-
 		ret = sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg);
 
 		return ret;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca7b869508c7..0ed1eb062313 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -675,6 +675,7 @@ extern int blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
+extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
 extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
 			      unsigned int, void __user *);
 extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
-- 
cgit v1.2.3


From 9bf04646b0b41c5438ed8a27c5f8dbe0ff40d756 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 15 Jan 2012 16:57:12 +0100
Subject: netfilter: revert user-space expectation helper support

This patch partially reverts:
3d058d7 netfilter: rework user-space expectation helper support
that was applied during the 3.2 development cycle.

After this patch, the tree remains just like before patch bc01bef,
that initially added the preliminary infrastructure.

I decided to partially revert this patch because the approach
that I proposed to resolve this problem is broken in NAT setups.
Moreover, a new infrastructure will be submitted for the 3.3.x
development cycle that resolve the existing issues while
providing a neat solution.

Since nobody has been seriously using this infrastructure in
user-space, the removal of this feature should affect any know
FOSS project (to my knowledge).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h |  4 ----
 include/linux/netfilter/xt_CT.h               |  3 +--
 net/netfilter/nf_conntrack_helper.c           | 12 ------------
 net/netfilter/nf_conntrack_netlink.c          |  4 ----
 net/netfilter/xt_CT.c                         |  8 +++-----
 5 files changed, 4 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 9e3a2838291b..0d3dd66322ec 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -83,10 +83,6 @@ enum ip_conntrack_status {
 	/* Conntrack is a fake untracked entry */
 	IPS_UNTRACKED_BIT = 12,
 	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
-
-	/* Conntrack has a userspace helper. */
-	IPS_USERSPACE_HELPER_BIT = 13,
-	IPS_USERSPACE_HELPER = (1 << IPS_USERSPACE_HELPER_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h
index 6390f0992f36..b56e76811c04 100644
--- a/include/linux/netfilter/xt_CT.h
+++ b/include/linux/netfilter/xt_CT.h
@@ -3,8 +3,7 @@
 
 #include <linux/types.h>
 
-#define XT_CT_NOTRACK		0x1
-#define XT_CT_USERSPACE_HELPER	0x2
+#define XT_CT_NOTRACK	0x1
 
 struct xt_ct_target_info {
 	__u16 flags;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 299fec91f741..bbe23baa19b6 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -121,18 +121,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 	int ret = 0;
 
 	if (tmpl != NULL) {
-		/* we've got a userspace helper. */
-		if (tmpl->status & IPS_USERSPACE_HELPER) {
-			help = nf_ct_helper_ext_add(ct, flags);
-			if (help == NULL) {
-				ret = -ENOMEM;
-				goto out;
-			}
-			rcu_assign_pointer(help->helper, NULL);
-			__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
-			ret = 0;
-			goto out;
-		}
 		help = nfct_help(tmpl);
 		if (help != NULL)
 			helper = help->helper;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2a4834b83332..9307b033c0c9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2042,10 +2042,6 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 	}
 	help = nfct_help(ct);
 	if (!help) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
-	if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) {
 		if (!cda[CTA_EXPECT_TIMEOUT]) {
 			err = -EINVAL;
 			goto out;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 8e87123f1373..0221d10de75a 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	int ret = 0;
 	u8 proto;
 
-	if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER))
-		return -EOPNOTSUPP;
+	if (info->flags & ~XT_CT_NOTRACK)
+		return -EINVAL;
 
 	if (info->flags & XT_CT_NOTRACK) {
 		ct = nf_ct_untracked_get();
@@ -92,9 +92,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 				  GFP_KERNEL))
 		goto err3;
 
-	if (info->flags & XT_CT_USERSPACE_HELPER) {
-		__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
-	} else if (info->helper[0]) {
+	if (info->helper[0]) {
 		ret = -ENOENT;
 		proto = xt_ct_find_proto(par);
 		if (!proto) {
-- 
cgit v1.2.3


From 7061ca3b6c99fc78115560b9a10227c8c5fafc45 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 20 Dec 2011 08:20:46 -0800
Subject: sched: Add "const" to is_idle_task() parameter

This patch fixes a build warning in -next due to a const pointer being
passed to is_idle_task().  Because is_idle_task() does not modify anything,
this commit adds the "const" to is_idle_task()'s argument declaration.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a7e4d333a27..56fa25a5b1eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2074,7 +2074,7 @@ extern struct task_struct *idle_task(int cpu);
  * is_idle_task - is the specified task an idle task?
  * @tsk: the task in question.
  */
-static inline bool is_idle_task(struct task_struct *p)
+static inline bool is_idle_task(const struct task_struct *p)
 {
 	return p->pid == 0;
 }
-- 
cgit v1.2.3


From b54ac6d2a25084667da781c7ca2cebef52a2bcdd Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 8 Dec 2011 11:25:49 +0800
Subject: ACPI, Record ACPI NVS regions

Some firmware will access memory in ACPI NVS region via APEI.  That
is, instructions in APEI ERST/EINJ table will read/write ACPI NVS
region.  The original resource conflict checking in APEI code will
check memory/ioport accessed by APEI via general resource management
mechanism.  But ACPI NVS region is marked as busy already, so that the
false resource conflict will prevent APEI ERST/EINJ to work.

To fix this, this patch record ACPI NVS regions, so that we can avoid
request resources for memory region inside it.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/e820.c |  4 ++--
 drivers/acpi/Makefile  |  3 ++-
 drivers/acpi/nvs.c     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/acpi.h   | 20 +++++++++++++------
 4 files changed, 70 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f076..51c3b186e5b9 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -714,7 +714,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 }
 #endif
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_ACPI
 /**
  * Mark ACPI NVS memory region, so that we can save/restore it during
  * hibernation and the subsequent resume.
@@ -727,7 +727,7 @@ static int __init e820_mark_nvs_memory(void)
 		struct e820entry *ei = &e820.map[i];
 
 		if (ei->type == E820_NVS)
-			suspend_nvs_register(ei->addr, ei->size);
+			acpi_nvs_register(ei->addr, ei->size);
 	}
 
 	return 0;
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index ecb26b4f29a0..c07f44f05f9d 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -20,11 +20,12 @@ obj-y				+= acpi.o \
 # All the builtin files are in the "acpi." module_param namespace.
 acpi-y				+= osl.o utils.o reboot.o
 acpi-y				+= atomicio.o
+acpi-y				+= nvs.o
 
 # sleep related files
 acpi-y				+= wakeup.o
 acpi-y				+= sleep.o
-acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o nvs.o
+acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o
 
 
 #
diff --git a/drivers/acpi/nvs.c b/drivers/acpi/nvs.c
index 096787b43c96..7a2035fa8c71 100644
--- a/drivers/acpi/nvs.c
+++ b/drivers/acpi/nvs.c
@@ -15,6 +15,56 @@
 #include <linux/acpi_io.h>
 #include <acpi/acpiosxf.h>
 
+/* ACPI NVS regions, APEI may use it */
+
+struct nvs_region {
+	__u64 phys_start;
+	__u64 size;
+	struct list_head node;
+};
+
+static LIST_HEAD(nvs_region_list);
+
+#ifdef CONFIG_ACPI_SLEEP
+static int suspend_nvs_register(unsigned long start, unsigned long size);
+#else
+static inline int suspend_nvs_register(unsigned long a, unsigned long b)
+{
+	return 0;
+}
+#endif
+
+int acpi_nvs_register(__u64 start, __u64 size)
+{
+	struct nvs_region *region;
+
+	region = kmalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+	region->phys_start = start;
+	region->size = size;
+	list_add_tail(&region->node, &nvs_region_list);
+
+	return suspend_nvs_register(start, size);
+}
+
+int acpi_nvs_for_each_region(int (*func)(__u64 start, __u64 size, void *data),
+			     void *data)
+{
+	int rc;
+	struct nvs_region *region;
+
+	list_for_each_entry(region, &nvs_region_list, node) {
+		rc = func(region->phys_start, region->size, data);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+
+#ifdef CONFIG_ACPI_SLEEP
 /*
  * Platforms, like ACPI, may want us to save some memory used by them during
  * suspend and to restore the contents of this memory during the subsequent
@@ -41,7 +91,7 @@ static LIST_HEAD(nvs_list);
  *	things so that the data from page-aligned addresses in this region will
  *	be copied into separate RAM pages.
  */
-int suspend_nvs_register(unsigned long start, unsigned long size)
+static int suspend_nvs_register(unsigned long start, unsigned long size)
 {
 	struct nvs_page *entry, *next;
 
@@ -159,3 +209,4 @@ void suspend_nvs_restore(void)
 		if (entry->data)
 			memcpy(entry->kaddr, entry->data, entry->size);
 }
+#endif
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6001b4da39dd..26b75442ff7a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -306,6 +306,11 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 extern void acpi_early_init(void);
 
+extern int acpi_nvs_register(__u64 start, __u64 size);
+
+extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
+				    void *data);
+
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
@@ -348,15 +353,18 @@ static inline int acpi_table_parse(char *id,
 {
 	return -1;
 }
-#endif	/* !CONFIG_ACPI */
 
-#ifdef CONFIG_ACPI_SLEEP
-int suspend_nvs_register(unsigned long start, unsigned long size);
-#else
-static inline int suspend_nvs_register(unsigned long a, unsigned long b)
+static inline int acpi_nvs_register(__u64 start, __u64 size)
 {
 	return 0;
 }
-#endif
+
+static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
+					   void *data)
+{
+	return 0;
+}
+
+#endif	/* !CONFIG_ACPI */
 
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From 6f68c91c55ea3576d366797fa8d45e31c4aa79f8 Mon Sep 17 00:00:00 2001
From: Myron Stowe <mstowe@redhat.com>
Date: Mon, 7 Nov 2011 16:23:34 -0700
Subject: ACPI: Export interfaces for ioremapping/iounmapping ACPI registers

Export remapping and unmapping interfaces - acpi_os_map_generic_address()
and acpi_os_unmap_generic_address() - for ACPI generic registers that are
backed by memory mapped I/O (MMIO).

The acpi_os_map_generic_address() and acpi_os_unmap_generic_address()
declarations may more properly belong in include/acpi/acpiosxf.h next to
acpi_os_read_memory() but I believe that would require the ACPI CA making
them an official part of the ACPI CA - OS interface.

ACPI Generic Address Structure (GAS) reference (ACPI's fixed/generic
hardware registers use the GAS format):
  ACPI Specification, Revision 4.0, Section 5.2.3.1, "Generic Address
  Structure"

Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c      | 6 ++++--
 include/linux/acpi_io.h | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 2e285cdbefb1..b11f2676f7c9 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -431,7 +431,7 @@ void __init early_acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
 		__acpi_unmap_table(virt, size);
 }
 
-static int acpi_os_map_generic_address(struct acpi_generic_address *gas)
+int acpi_os_map_generic_address(struct acpi_generic_address *gas)
 {
 	u64 addr;
 	void __iomem *virt;
@@ -450,8 +450,9 @@ static int acpi_os_map_generic_address(struct acpi_generic_address *gas)
 
 	return 0;
 }
+EXPORT_SYMBOL(acpi_os_map_generic_address);
 
-static void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
+void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
 {
 	u64 addr;
 	struct acpi_ioremap *map;
@@ -475,6 +476,7 @@ static void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
 
 	acpi_os_map_cleanup(map);
 }
+EXPORT_SYMBOL(acpi_os_unmap_generic_address);
 
 #ifdef ACPI_FUTURE_USAGE
 acpi_status
diff --git a/include/linux/acpi_io.h b/include/linux/acpi_io.h
index 4afd7102459d..b0ffa219993e 100644
--- a/include/linux/acpi_io.h
+++ b/include/linux/acpi_io.h
@@ -12,4 +12,7 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
 
 void __iomem *acpi_os_get_iomem(acpi_physical_address phys, unsigned int size);
 
+int acpi_os_map_generic_address(struct acpi_generic_address *addr);
+void acpi_os_unmap_generic_address(struct acpi_generic_address *addr);
+
 #endif
-- 
cgit v1.2.3


From 7d5869e78f4c9d32f834dadefbb7dcb3c9d4d85f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 13 Jan 2012 23:58:41 +0100
Subject: bcma: connect the bcma bus suspend/resume to the bcma driver
 suspend/resume

Now the low-level driver actually gets informed that it is getting suspended and resumed.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/main.c                                   | 19 +++++++++++++++++++
 drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c |  2 +-
 include/linux/bcma/bcma.h                             |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index b711d9d634a7..febbc0a1222a 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -243,6 +243,16 @@ int __init bcma_bus_early_register(struct bcma_bus *bus,
 #ifdef CONFIG_PM
 int bcma_bus_suspend(struct bcma_bus *bus)
 {
+	struct bcma_device *core;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		struct device_driver *drv = core->dev.driver;
+		if (drv) {
+			struct bcma_driver *adrv = container_of(drv, struct bcma_driver, drv);
+			if (adrv->suspend)
+				adrv->suspend(core);
+		}
+	}
 	return 0;
 }
 
@@ -257,6 +267,15 @@ int bcma_bus_resume(struct bcma_bus *bus)
 		bcma_core_chipcommon_init(&bus->drv_cc);
 	}
 
+	list_for_each_entry(core, &bus->cores, list) {
+		struct device_driver *drv = core->dev.driver;
+		if (drv) {
+			struct bcma_driver *adrv = container_of(drv, struct bcma_driver, drv);
+			if (adrv->resume)
+				adrv->resume(core);
+		}
+	}
+
 	return 0;
 }
 #endif
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
index d106576ce338..213130afdaf7 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
@@ -1135,7 +1135,7 @@ static int brcms_pci_suspend(struct pci_dev *pdev)
 	return pci_set_power_state(pdev, PCI_D3hot);
 }
 
-static int brcms_suspend(struct bcma_device *pdev, pm_message_t state)
+static int brcms_suspend(struct bcma_device *pdev)
 {
 	struct brcms_info *wl;
 	struct ieee80211_hw *hw;
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index f4b8346b1a33..83c209f39493 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -162,7 +162,7 @@ struct bcma_driver {
 
 	int (*probe)(struct bcma_device *dev);
 	void (*remove)(struct bcma_device *dev);
-	int (*suspend)(struct bcma_device *dev, pm_message_t state);
+	int (*suspend)(struct bcma_device *dev);
 	int (*resume)(struct bcma_device *dev);
 	void (*shutdown)(struct bcma_device *dev);
 
-- 
cgit v1.2.3


From 20c300b10c358daa507be335aec6aa3987ef425a Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 17 Jan 2012 12:54:01 +0400
Subject: tty: remove unused tty_driver->termios_locked

This field is unused since 2.6.28 (commit fe6e29fdb1a7: "tty: simplify
ktermios allocation", to be exact)

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty_driver.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index ecdaeb98b293..5cf685086dd3 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -312,7 +312,6 @@ struct tty_driver {
 	 */
 	struct tty_struct **ttys;
 	struct ktermios **termios;
-	struct ktermios **termios_locked;
 	void *driver_state;
 
 	/*
-- 
cgit v1.2.3


From 85e7bac33b8d5edafc4e219c7dfdb3d48e0b4e31 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:05 -0500
Subject: seccomp: audit abnormal end to a process due to seccomp

The audit system likes to collect information about processes that end
abnormally (SIGSEGV) as this may me useful intrusion detection information.
This patch adds audit support to collect information when seccomp forces a
task to exit because of misbehavior in a similar way.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  8 ++++++++
 kernel/auditsc.c      | 50 +++++++++++++++++++++++++++++---------------------
 kernel/seccomp.c      |  2 ++
 3 files changed, 39 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 426ab9f4dd85..6e1c533f9b46 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -430,6 +430,7 @@ extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct dentry *dentry);
 extern void __audit_inode_child(const struct dentry *dentry,
 				const struct inode *parent);
+extern void __audit_seccomp(unsigned long syscall);
 extern void __audit_ptrace(struct task_struct *t);
 
 static inline int audit_dummy_context(void)
@@ -453,6 +454,12 @@ static inline void audit_inode_child(const struct dentry *dentry,
 }
 void audit_core_dumps(long signr);
 
+static inline void audit_seccomp(unsigned long syscall)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_seccomp(syscall);
+}
+
 static inline void audit_ptrace(struct task_struct *t)
 {
 	if (unlikely(!audit_dummy_context()))
@@ -558,6 +565,7 @@ extern int audit_signals;
 #define audit_inode(n,d) do { (void)(d); } while (0)
 #define audit_inode_child(i,p) do { ; } while (0)
 #define audit_core_dumps(i) do { ; } while (0)
+#define audit_seccomp(i) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) (0)
 #define audit_get_loginuid(t) (-1)
 #define audit_get_sessionid(t) (-1)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7c495147c3d9..e9bcb93800d8 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2529,6 +2529,25 @@ void __audit_mmap_fd(int fd, int flags)
 	context->type = AUDIT_MMAP;
 }
 
+static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+{
+	uid_t auid, uid;
+	gid_t gid;
+	unsigned int sessionid;
+
+	auid = audit_get_loginuid(current);
+	sessionid = audit_get_sessionid(current);
+	current_uid_gid(&uid, &gid);
+
+	audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
+			 auid, uid, gid, sessionid);
+	audit_log_task_context(ab);
+	audit_log_format(ab, " pid=%d comm=", current->pid);
+	audit_log_untrustedstring(ab, current->comm);
+	audit_log_format(ab, " reason=");
+	audit_log_string(ab, reason);
+	audit_log_format(ab, " sig=%ld", signr);
+}
 /**
  * audit_core_dumps - record information about processes that end abnormally
  * @signr: signal value
@@ -2539,10 +2558,6 @@ void __audit_mmap_fd(int fd, int flags)
 void audit_core_dumps(long signr)
 {
 	struct audit_buffer *ab;
-	u32 sid;
-	uid_t auid = audit_get_loginuid(current), uid;
-	gid_t gid;
-	unsigned int sessionid = audit_get_sessionid(current);
 
 	if (!audit_enabled)
 		return;
@@ -2551,24 +2566,17 @@ void audit_core_dumps(long signr)
 		return;
 
 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
-	current_uid_gid(&uid, &gid);
-	audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
-			 auid, uid, gid, sessionid);
-	security_task_getsecid(current, &sid);
-	if (sid) {
-		char *ctx = NULL;
-		u32 len;
+	audit_log_abend(ab, "memory violation", signr);
+	audit_log_end(ab);
+}
 
-		if (security_secid_to_secctx(sid, &ctx, &len))
-			audit_log_format(ab, " ssid=%u", sid);
-		else {
-			audit_log_format(ab, " subj=%s", ctx);
-			security_release_secctx(ctx, len);
-		}
-	}
-	audit_log_format(ab, " pid=%d comm=", current->pid);
-	audit_log_untrustedstring(ab, current->comm);
-	audit_log_format(ab, " sig=%ld", signr);
+void __audit_seccomp(unsigned long syscall)
+{
+	struct audit_buffer *ab;
+
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+	audit_log_abend(ab, "seccomp", SIGKILL);
+	audit_log_format(ab, " syscall=%ld", syscall);
 	audit_log_end(ab);
 }
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 57d4b13b631d..e8d76c5895ea 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -6,6 +6,7 @@
  * This defines a simple but solid secure-computing mode.
  */
 
+#include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/sched.h>
 #include <linux/compat.h>
@@ -54,6 +55,7 @@ void __secure_computing(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
+	audit_seccomp(this_syscall);
 	do_exit(SIGKILL);
 }
 
-- 
cgit v1.2.3


From d7e7528bcd456f5c36ad4a202ccfb43c5aa98bc4 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:06 -0500
Subject: Audit: push audit success and retcode into arch ptrace.h

The audit system previously expected arches calling to audit_syscall_exit to
supply as arguments if the syscall was a success and what the return code was.
Audit also provides a helper AUDITSC_RESULT which was supposed to simplify things
by converting from negative retcodes to an audit internal magic value stating
success or failure.  This helper was wrong and could indicate that a valid
pointer returned to userspace was a failed syscall.  The fix is to fix the
layering foolishness.  We now pass audit_syscall_exit a struct pt_reg and it
in turns calls back into arch code to collect the return value and to
determine if the syscall was a success or failure.  We also define a generic
is_syscall_success() macro which determines success/failure based on if the
value is < -MAX_ERRNO.  This works for arches like x86 which do not use a
separate mechanism to indicate syscall failure.

We make both the is_syscall_success() and regs_return_value() static inlines
instead of macros.  The reason is because the audit function must take a void*
for the regs.  (uml calls theirs struct uml_pt_regs instead of just struct
pt_regs so audit_syscall_exit can't take a struct pt_regs).  Since the audit
function takes a void* we need to use static inlines to cast it back to the
arch correct structure to dereference it.

The other major change is that on some arches, like ia64, MIPS and ppc, we
change regs_return_value() to give us the negative value on syscall failure.
THE only other user of this macro, kretprobe_example.c, won't notice and it
makes the value signed consistently for the audit functions across all archs.

In arch/sh/kernel/ptrace_64.c I see that we were using regs[9] in the old
audit code as the return value.  But the ptrace_64.h code defined the macro
regs_return_value() as regs[3].  I have no idea which one is correct, but this
patch now uses the regs_return_value() function, so it now uses regs[3].

For powerpc we previously used regs->result but now use the
regs_return_value() function which uses regs->gprs[3].  regs->gprs[3] is
always positive so the regs_return_value(), much like ia64 makes it negative
before calling the audit code when appropriate.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: H. Peter Anvin <hpa@zytor.com> [for x86 portion]
Acked-by: Tony Luck <tony.luck@intel.com> [for ia64]
Acked-by: Richard Weinberger <richard@nod.at> [for uml]
Acked-by: David S. Miller <davem@davemloft.net> [for sparc]
Acked-by: Ralf Baechle <ralf@linux-mips.org> [for mips]
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [for ppc]
---
 arch/ia64/include/asm/ptrace.h       | 13 ++++++++++++-
 arch/ia64/kernel/ptrace.c            |  9 +--------
 arch/microblaze/include/asm/ptrace.h |  5 +++++
 arch/microblaze/kernel/ptrace.c      |  3 +--
 arch/mips/include/asm/ptrace.h       | 14 +++++++++++++-
 arch/mips/kernel/ptrace.c            |  4 +---
 arch/powerpc/include/asm/ptrace.h    | 13 ++++++++++++-
 arch/powerpc/kernel/ptrace.c         |  4 +---
 arch/s390/include/asm/ptrace.h       |  6 +++++-
 arch/s390/kernel/ptrace.c            |  4 +---
 arch/sh/include/asm/ptrace_32.h      |  5 ++++-
 arch/sh/include/asm/ptrace_64.h      |  5 ++++-
 arch/sh/kernel/ptrace_32.c           |  4 +---
 arch/sh/kernel/ptrace_64.c           |  4 +---
 arch/sparc/include/asm/ptrace.h      | 10 +++++++++-
 arch/sparc/kernel/ptrace_64.c        | 11 +----------
 arch/um/kernel/ptrace.c              |  4 ++--
 arch/x86/ia32/ia32entry.S            | 10 +++++-----
 arch/x86/kernel/entry_32.S           |  8 ++++----
 arch/x86/kernel/entry_64.S           | 10 +++++-----
 arch/x86/kernel/ptrace.c             |  3 +--
 arch/x86/kernel/vm86_32.c            |  4 ++--
 arch/x86/um/shared/sysdep/ptrace.h   |  5 +++++
 include/linux/audit.h                | 22 ++++++++++++++--------
 include/linux/ptrace.h               | 10 ++++++++++
 kernel/auditsc.c                     | 16 ++++++++++++----
 26 files changed, 132 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index f5cb27614e35..68c98f5b3ca6 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -246,7 +246,18 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
 	return regs->ar_bspstore;
 }
 
-#define regs_return_value(regs) ((regs)->r8)
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return regs->r10 != -1;
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->r8;
+	else
+		return -regs->r8;
+}
 
 /* Conserve space in histogram by encoding slot bits in address
  * bits 2 and 3 rather than bits 0 and 1.
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 8848f43d819e..2c154088cce7 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1268,14 +1268,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 {
 	int step;
 
-	if (unlikely(current->audit_context)) {
-		int success = AUDITSC_RESULT(regs.r10);
-		long result = regs.r8;
-
-		if (success != AUDITSC_SUCCESS)
-			result = -result;
-		audit_syscall_exit(success, result);
-	}
+	audit_syscall_exit(&regs);
 
 	step = test_thread_flag(TIF_SINGLESTEP);
 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h
index 816bee64b196..94e92c805859 100644
--- a/arch/microblaze/include/asm/ptrace.h
+++ b/arch/microblaze/include/asm/ptrace.h
@@ -61,6 +61,11 @@ struct pt_regs {
 #define instruction_pointer(regs)	((regs)->pc)
 #define profile_pc(regs)		instruction_pointer(regs)
 
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->r3;
+}
+
 #else /* __KERNEL__ */
 
 /* pt_regs offsets used by gdbserver etc in ptrace syscalls */
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 043cb58f9c44..f564b1bfd386 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -159,8 +159,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3);
+	audit_syscall_exit(regs);
 
 	step = test_thread_flag(TIF_SINGLESTEP);
 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index de39b1f343ea..7d409505df2d 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -137,7 +137,19 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
  */
 #define user_mode(regs) (((regs)->cp0_status & KU_MASK) == KU_USER)
 
-#define regs_return_value(_regs) ((_regs)->regs[2])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !regs->regs[7];
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->regs[2];
+	else
+		return -regs->regs[2];
+}
+
 #define instruction_pointer(regs) ((regs)->cp0_epc)
 #define profile_pc(regs) instruction_pointer(regs)
 
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 4e6ea1ffad46..ab0f1963a7bd 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -572,9 +572,7 @@ out:
  */
 asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 {
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]),
-		                   -regs->regs[2]);
+	audit_syscall_exit(regs);
 
 	if (!(current->ptrace & PT_PTRACED))
 		return;
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 48223f9b8728..78a205162fd7 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -86,7 +86,18 @@ struct pt_regs {
 #define instruction_pointer(regs) ((regs)->nip)
 #define user_stack_pointer(regs) ((regs)->gpr[1])
 #define kernel_stack_pointer(regs) ((regs)->gpr[1])
-#define regs_return_value(regs) ((regs)->gpr[3])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->gpr[3];
+	else
+		return -regs->gpr[3];
+}
 
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 5de73dbd15c7..09d31c12a5e3 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1748,9 +1748,7 @@ void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS,
-				   regs->result);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->result);
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 56da355678f4..aeb77f017985 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -541,9 +541,13 @@ struct user_regs_struct
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
 #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
 #define user_stack_pointer(regs)((regs)->gprs[15])
-#define regs_return_value(regs)((regs)->gprs[2])
 #define profile_pc(regs) instruction_pointer(regs)
 
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->gprs[2];
+}
+
 int regs_query_register_offset(const char *name);
 const char *regs_query_register_name(unsigned int offset);
 unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 573bc29551ef..f52758600980 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -751,9 +751,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
 {
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]),
-				   regs->gprs[2]);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->gprs[2]);
diff --git a/arch/sh/include/asm/ptrace_32.h b/arch/sh/include/asm/ptrace_32.h
index 6c2239cca1a2..2d3e906aa722 100644
--- a/arch/sh/include/asm/ptrace_32.h
+++ b/arch/sh/include/asm/ptrace_32.h
@@ -76,7 +76,10 @@ struct pt_dspregs {
 #ifdef __KERNEL__
 
 #define MAX_REG_OFFSET		offsetof(struct pt_regs, tra)
-#define regs_return_value(_regs)	((_regs)->regs[0])
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sh/include/asm/ptrace_64.h b/arch/sh/include/asm/ptrace_64.h
index bf9be7764d69..eb3fcceaf64b 100644
--- a/arch/sh/include/asm/ptrace_64.h
+++ b/arch/sh/include/asm/ptrace_64.h
@@ -13,7 +13,10 @@ struct pt_regs {
 #ifdef __KERNEL__
 
 #define MAX_REG_OFFSET		offsetof(struct pt_regs, tregs[7])
-#define regs_return_value(_regs)	((_regs)->regs[3])
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[3];
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 92b3c276339a..c0b5c179d27b 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -530,9 +530,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[0]),
-				   regs->regs[0]);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->regs[0]);
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index c8f97649f354..ba720d686435 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -548,9 +548,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[9]),
-				   regs->regs[9]);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->regs[9]);
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index a0e1bcf843a1..c00c3b5c2806 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -207,7 +207,15 @@ do {	current_thread_info()->syscall_noerror = 1; \
 #define instruction_pointer(regs) ((regs)->tpc)
 #define instruction_pointer_set(regs, val) ((regs)->tpc = (val))
 #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
-#define regs_return_value(regs) ((regs)->u_regs[UREG_I0])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !(regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY));
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->u_regs[UREG_I0];
+}
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *);
 #else
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 96ee50a80661..c73c8c50f117 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -1086,17 +1086,8 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 
 asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 {
-#ifdef CONFIG_AUDITSYSCALL
-	if (unlikely(current->audit_context)) {
-		unsigned long tstate = regs->tstate;
-		int result = AUDITSC_SUCCESS;
+	audit_syscall_exit(regs);
 
-		if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
-			result = AUDITSC_FAILURE;
-
-		audit_syscall_exit(result, regs->u_regs[UREG_I0]);
-	}
-#endif
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->u_regs[UREG_G1]);
 
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index c9da32b0c707..2ccf25c42feb 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -175,8 +175,8 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit)
 					    UPT_SYSCALL_ARG2(regs),
 					    UPT_SYSCALL_ARG3(regs),
 					    UPT_SYSCALL_ARG4(regs));
-		else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)),
-					UPT_SYSCALL_RET(regs));
+		else
+			audit_syscall_exit(regs);
 	}
 
 	/* Fake a debug trap */
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3e274564f6bf..64ced0b8f8fd 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -14,6 +14,7 @@
 #include <asm/segment.h>
 #include <asm/irqflags.h>
 #include <linux/linkage.h>
+#include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -208,12 +209,11 @@ sysexit_from_sys_call:
 	TRACE_IRQS_ON
 	sti
 	movl %eax,%esi		/* second arg, syscall return value */
-	cmpl $0,%eax		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
-	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
-	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall return value */
+	call __audit_syscall_exit
+	movq RAX-ARGOFFSET(%rsp),%rax	/* reload syscall return value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	cli
 	TRACE_IRQS_OFF
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 22d0e21b4dd7..a22facf06f0e 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -42,6 +42,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/err.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
 #include <asm/errno.h>
@@ -466,11 +467,10 @@ sysexit_audit:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_ANY)
 	movl %eax,%edx		/* second arg, syscall return value */
-	cmpl $0,%eax		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%eax		/* zero-extend that */
-	inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
+	call __audit_syscall_exit
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a20e1cb9dc87..e51393dd93a3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
 #include <asm/paravirt.h>
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
+#include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -563,17 +564,16 @@ auditsys:
 	jmp system_call_fastpath
 
 	/*
-	 * Return fast path for syscall audit.  Call audit_syscall_exit()
+	 * Return fast path for syscall audit.  Call __audit_syscall_exit()
 	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
 	 * masked off.
 	 */
 sysret_audit:
 	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
-	cmpq $0,%rsi		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
-	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
+	call __audit_syscall_exit
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	jmp sysret_check
 #endif	/* CONFIG_AUDITSYSCALL */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 89a04c7b5bb6..8b0218758775 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1414,8 +1414,7 @@ void syscall_trace_leave(struct pt_regs *regs)
 {
 	bool step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->ax);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 863f8753ab0a..af17e1c966dc 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -335,9 +335,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	if (info->flags & VM86_SCREEN_BITMAP)
 		mark_screen_rdonly(tsk->mm);
 
-	/*call audit_syscall_exit since we do not exit via the normal paths */
+	/*call __audit_syscall_exit since we do not exit via the normal paths */
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(0), 0);
+		__audit_syscall_exit(1, 0);
 
 	__asm__ __volatile__(
 		"movl %0,%%esp\n\t"
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 711b1621747f..5ef9344a8b24 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -3,3 +3,8 @@
 #else
 #include "ptrace_64.h"
 #endif
+
+static inline long regs_return_value(struct uml_pt_regs *regs)
+{
+	return UPT_SYSCALL_RET(regs);
+}
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 6e1c533f9b46..3d65e4b3ba06 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,6 +26,7 @@
 
 #include <linux/types.h>
 #include <linux/elf-em.h>
+#include <linux/ptrace.h>
 
 /* The netlink messages for the audit system is divided into blocks:
  * 1000 - 1099 are for commanding the audit system
@@ -408,10 +409,6 @@ struct audit_field {
 	void				*lsm_rule;
 };
 
-#define AUDITSC_INVALID 0
-#define AUDITSC_SUCCESS 1
-#define AUDITSC_FAILURE 2
-#define AUDITSC_RESULT(x) ( ((long)(x))<0?AUDITSC_FAILURE:AUDITSC_SUCCESS )
 extern int __init audit_register_class(int class, unsigned *list);
 extern int audit_classify_syscall(int abi, unsigned syscall);
 extern int audit_classify_arch(int arch);
@@ -424,7 +421,7 @@ extern void audit_free(struct task_struct *task);
 extern void audit_syscall_entry(int arch,
 				int major, unsigned long a0, unsigned long a1,
 				unsigned long a2, unsigned long a3);
-extern void audit_syscall_exit(int failed, long return_code);
+extern void __audit_syscall_exit(int ret_success, long ret_value);
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct dentry *dentry);
@@ -438,6 +435,15 @@ static inline int audit_dummy_context(void)
 	void *p = current->audit_context;
 	return !p || *(int *)p;
 }
+static inline void audit_syscall_exit(void *pt_regs)
+{
+	if (unlikely(current->audit_context)) {
+		int success = is_syscall_success(pt_regs);
+		int return_code = regs_return_value(pt_regs);
+
+		__audit_syscall_exit(success, return_code);
+	}
+}
 static inline void audit_getname(const char *name)
 {
 	if (unlikely(!audit_dummy_context()))
@@ -551,12 +557,12 @@ static inline void audit_mmap_fd(int fd, int flags)
 
 extern int audit_n_rules;
 extern int audit_signals;
-#else
+#else /* CONFIG_AUDITSYSCALL */
 #define audit_finish_fork(t)
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
 #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0)
-#define audit_syscall_exit(f,r) do { ; } while (0)
+#define audit_syscall_exit(r) do { ; } while (0)
 #define audit_dummy_context() 1
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
@@ -587,7 +593,7 @@ extern int audit_signals;
 #define audit_ptrace(t) ((void)0)
 #define audit_n_rules 0
 #define audit_signals 0
-#endif
+#endif /* CONFIG_AUDITSYSCALL */
 
 #ifdef CONFIG_AUDIT
 /* These are defined in audit.c */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 800f113bea66..dd4cefa6519d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -112,6 +112,7 @@
 
 #include <linux/compiler.h>		/* For unlikely.  */
 #include <linux/sched.h>		/* For struct task_struct.  */
+#include <linux/err.h>			/* for IS_ERR_VALUE */
 
 
 extern long arch_ptrace(struct task_struct *child, long request,
@@ -265,6 +266,15 @@ static inline void ptrace_release_task(struct task_struct *task)
 #define force_successful_syscall_return() do { } while (0)
 #endif
 
+#ifndef is_syscall_success
+/*
+ * On most systems we can tell if a syscall is a success based on if the retval
+ * is an error value.  On some systems like ia64 and powerpc they have different
+ * indicators of success/failure and must define their own.
+ */
+#define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs))))
+#endif
+
 /*
  * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
  *
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e9bcb93800d8..3d2853808185 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -70,6 +70,11 @@
 
 #include "audit.h"
 
+/* flags stating the success for a syscall */
+#define AUDITSC_INVALID 0
+#define AUDITSC_SUCCESS 1
+#define AUDITSC_FAILURE 2
+
 /* AUDIT_NAMES is the number of slots we reserve in the audit_context
  * for saving names from getname().  If we get more names we will allocate
  * a name dynamically and also add those to the list anchored by names_list. */
@@ -1724,8 +1729,7 @@ void audit_finish_fork(struct task_struct *child)
 
 /**
  * audit_syscall_exit - deallocate audit context after a system call
- * @valid: success/failure flag
- * @return_code: syscall return value
+ * @pt_regs: syscall registers
  *
  * Tear down after system call.  If the audit context has been marked as
  * auditable (either because of the AUDIT_RECORD_CONTEXT state from
@@ -1733,13 +1737,17 @@ void audit_finish_fork(struct task_struct *child)
  * message), then write out the syscall information.  In call cases,
  * free the names stored from getname().
  */
-void audit_syscall_exit(int valid, long return_code)
+void __audit_syscall_exit(int success, long return_code)
 {
 	struct task_struct *tsk = current;
 	struct audit_context *context;
 
-	context = audit_get_context(tsk, valid, return_code);
+	if (success)
+		success = AUDITSC_SUCCESS;
+	else
+		success = AUDITSC_FAILURE;
 
+	context = audit_get_context(tsk, success, return_code);
 	if (likely(!context))
 		return;
 
-- 
cgit v1.2.3


From b05d8447e7821695bc2fa3359431f7a664232743 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:06 -0500
Subject: audit: inline audit_syscall_entry to reduce burden on archs

Every arch calls:

if (unlikely(current->audit_context))
	audit_syscall_entry()

which requires knowledge about audit (the existance of audit_context) in
the arch code.  Just do it all in static inline in audit.h so that arch's
can remain blissfully ignorant.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 arch/ia64/kernel/ptrace.c       |  9 +--------
 arch/microblaze/kernel/ptrace.c |  6 ++----
 arch/mips/kernel/ptrace.c       |  7 +++----
 arch/powerpc/kernel/ptrace.c    | 26 ++++++++++++--------------
 arch/s390/kernel/ptrace.c       | 11 +++++------
 arch/sh/kernel/ptrace_32.c      |  7 +++----
 arch/sh/kernel/ptrace_64.c      |  7 +++----
 arch/sparc/kernel/ptrace_64.c   | 17 ++++++++---------
 arch/um/kernel/ptrace.c         | 20 +++++++++-----------
 arch/x86/ia32/ia32entry.S       |  2 +-
 arch/x86/kernel/entry_32.S      |  2 +-
 arch/x86/kernel/entry_64.S      |  4 ++--
 arch/x86/kernel/ptrace.c        | 22 ++++++++++------------
 arch/xtensa/kernel/ptrace.c     |  3 +--
 include/linux/audit.h           | 13 ++++++++++---
 kernel/auditsc.c                |  2 +-
 16 files changed, 72 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 2c154088cce7..dad91661ddf9 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1246,15 +1246,8 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
 	if (test_thread_flag(TIF_RESTORE_RSE))
 		ia64_sync_krbs();
 
-	if (unlikely(current->audit_context)) {
-		long syscall;
-		int arch;
 
-		syscall = regs.r15;
-		arch = AUDIT_ARCH_IA64;
-
-		audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3);
-	}
+	audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3);
 
 	return 0;
 }
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index f564b1bfd386..6eb2aa927d89 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -147,10 +147,8 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 */
 		ret = -1L;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(EM_MICROBLAZE, regs->r12,
-				    regs->r5, regs->r6,
-				    regs->r7, regs->r8);
+	audit_syscall_entry(EM_MICROBLAZE, regs->r12, regs->r5, regs->r6,
+			    regs->r7, regs->r8);
 
 	return ret ?: regs->r12;
 }
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index ab0f1963a7bd..7786b608d932 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -560,10 +560,9 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 	}
 
 out:
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(audit_arch(), regs->regs[2],
-				    regs->regs[4], regs->regs[5],
-				    regs->regs[6], regs->regs[7]);
+	audit_syscall_entry(audit_arch(), regs->regs[2],
+			    regs->regs[4], regs->regs[5],
+			    regs->regs[6], regs->regs[7]);
 }
 
 /*
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 09d31c12a5e3..5b43325402bc 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1724,22 +1724,20 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->gpr[0]);
 
-	if (unlikely(current->audit_context)) {
 #ifdef CONFIG_PPC64
-		if (!is_32bit_task())
-			audit_syscall_entry(AUDIT_ARCH_PPC64,
-					    regs->gpr[0],
-					    regs->gpr[3], regs->gpr[4],
-					    regs->gpr[5], regs->gpr[6]);
-		else
+	if (!is_32bit_task())
+		audit_syscall_entry(AUDIT_ARCH_PPC64,
+				    regs->gpr[0],
+				    regs->gpr[3], regs->gpr[4],
+				    regs->gpr[5], regs->gpr[6]);
+	else
 #endif
-			audit_syscall_entry(AUDIT_ARCH_PPC,
-					    regs->gpr[0],
-					    regs->gpr[3] & 0xffffffff,
-					    regs->gpr[4] & 0xffffffff,
-					    regs->gpr[5] & 0xffffffff,
-					    regs->gpr[6] & 0xffffffff);
-	}
+		audit_syscall_entry(AUDIT_ARCH_PPC,
+				    regs->gpr[0],
+				    regs->gpr[3] & 0xffffffff,
+				    regs->gpr[4] & 0xffffffff,
+				    regs->gpr[5] & 0xffffffff,
+				    regs->gpr[6] & 0xffffffff);
 
 	return ret ?: regs->gpr[0];
 }
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index f52758600980..9d82ed4bcb27 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -740,12 +740,11 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->gprs[2]);
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(is_compat_task() ?
-					AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
-				    regs->gprs[2], regs->orig_gpr2,
-				    regs->gprs[3], regs->gprs[4],
-				    regs->gprs[5]);
+	audit_syscall_entry(is_compat_task() ?
+				AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
+			    regs->gprs[2], regs->orig_gpr2,
+			    regs->gprs[3], regs->gprs[4],
+			    regs->gprs[5]);
 	return ret ?: regs->gprs[2];
 }
 
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index c0b5c179d27b..a3e651563763 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -518,10 +518,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[0]);
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(audit_arch(), regs->regs[3],
-				    regs->regs[4], regs->regs[5],
-				    regs->regs[6], regs->regs[7]);
+	audit_syscall_entry(audit_arch(), regs->regs[3],
+			    regs->regs[4], regs->regs[5],
+			    regs->regs[6], regs->regs[7]);
 
 	return ret ?: regs->regs[0];
 }
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index ba720d686435..3d0080b5c976 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -536,10 +536,9 @@ asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[9]);
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(audit_arch(), regs->regs[1],
-				    regs->regs[2], regs->regs[3],
-				    regs->regs[4], regs->regs[5]);
+	audit_syscall_entry(audit_arch(), regs->regs[1],
+			    regs->regs[2], regs->regs[3],
+			    regs->regs[4], regs->regs[5]);
 
 	return ret ?: regs->regs[9];
 }
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index c73c8c50f117..9388844cd88c 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -1071,15 +1071,14 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->u_regs[UREG_G1]);
 
-	if (unlikely(current->audit_context) && !ret)
-		audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
-				     AUDIT_ARCH_SPARC :
-				     AUDIT_ARCH_SPARC64),
-				    regs->u_regs[UREG_G1],
-				    regs->u_regs[UREG_I0],
-				    regs->u_regs[UREG_I1],
-				    regs->u_regs[UREG_I2],
-				    regs->u_regs[UREG_I3]);
+	audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
+			     AUDIT_ARCH_SPARC :
+			     AUDIT_ARCH_SPARC64),
+			    regs->u_regs[UREG_G1],
+			    regs->u_regs[UREG_I0],
+			    regs->u_regs[UREG_I1],
+			    regs->u_regs[UREG_I2],
+			    regs->u_regs[UREG_I3]);
 
 	return ret;
 }
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 2ccf25c42feb..06b190390505 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -167,17 +167,15 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit)
 	int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit;
 	int tracesysgood;
 
-	if (unlikely(current->audit_context)) {
-		if (!entryexit)
-			audit_syscall_entry(HOST_AUDIT_ARCH,
-					    UPT_SYSCALL_NR(regs),
-					    UPT_SYSCALL_ARG1(regs),
-					    UPT_SYSCALL_ARG2(regs),
-					    UPT_SYSCALL_ARG3(regs),
-					    UPT_SYSCALL_ARG4(regs));
-		else
-			audit_syscall_exit(regs);
-	}
+	if (!entryexit)
+		audit_syscall_entry(HOST_AUDIT_ARCH,
+				    UPT_SYSCALL_NR(regs),
+				    UPT_SYSCALL_ARG1(regs),
+				    UPT_SYSCALL_ARG2(regs),
+				    UPT_SYSCALL_ARG3(regs),
+				    UPT_SYSCALL_ARG4(regs));
+	else
+		audit_syscall_exit(regs);
 
 	/* Fake a debug trap */
 	if (is_singlestep)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 025f0f01d254..cecfd9a8f734 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -192,7 +192,7 @@ sysexit_from_sys_call:
 	movl %ebx,%edx			/* 3rd arg: 1st syscall arg */
 	movl %eax,%esi			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_I386,%edi	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index a22facf06f0e..1ccd742eba1b 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -456,7 +456,7 @@ sysenter_audit:
 	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
 	movl %eax,%edx			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	pushl_cfi %ebx
 	movl PT_EAX(%esp),%eax		/* reload syscall number */
 	jmp sysenter_do_call
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e51393dd93a3..1ca66b650123 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -549,7 +549,7 @@ badsys:
 #ifdef CONFIG_AUDITSYSCALL
 	/*
 	 * Fast path for syscall audit without full syscall trace.
-	 * We just call audit_syscall_entry() directly, and then
+	 * We just call __audit_syscall_entry() directly, and then
 	 * jump back to the normal fast path.
 	 */
 auditsys:
@@ -559,7 +559,7 @@ auditsys:
 	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
 	movq %rax,%rsi			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	LOAD_ARGS 0		/* reload call-clobbered registers */
 	jmp system_call_fastpath
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 8b0218758775..50267386b766 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1392,20 +1392,18 @@ long syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->orig_ax);
 
-	if (unlikely(current->audit_context)) {
-		if (IS_IA32)
-			audit_syscall_entry(AUDIT_ARCH_I386,
-					    regs->orig_ax,
-					    regs->bx, regs->cx,
-					    regs->dx, regs->si);
+	if (IS_IA32)
+		audit_syscall_entry(AUDIT_ARCH_I386,
+				    regs->orig_ax,
+				    regs->bx, regs->cx,
+				    regs->dx, regs->si);
 #ifdef CONFIG_X86_64
-		else
-			audit_syscall_entry(AUDIT_ARCH_X86_64,
-					    regs->orig_ax,
-					    regs->di, regs->si,
-					    regs->dx, regs->r10);
+	else
+		audit_syscall_entry(AUDIT_ARCH_X86_64,
+				    regs->orig_ax,
+				    regs->di, regs->si,
+				    regs->dx, regs->r10);
 #endif
-	}
 
 	return ret ?: regs->orig_ax;
 }
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index a0d042aa2967..2dff698ab02e 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -334,8 +334,7 @@ void do_syscall_trace_enter(struct pt_regs *regs)
 		do_syscall_trace();
 
 #if 0
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
+	audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
 #endif
 }
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3d65e4b3ba06..f56ce2669b83 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -418,9 +418,9 @@ extern int audit_classify_arch(int arch);
 extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
 extern void audit_free(struct task_struct *task);
-extern void audit_syscall_entry(int arch,
-				int major, unsigned long a0, unsigned long a1,
-				unsigned long a2, unsigned long a3);
+extern void __audit_syscall_entry(int arch,
+				  int major, unsigned long a0, unsigned long a1,
+				  unsigned long a2, unsigned long a3);
 extern void __audit_syscall_exit(int ret_success, long ret_value);
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
@@ -435,6 +435,13 @@ static inline int audit_dummy_context(void)
 	void *p = current->audit_context;
 	return !p || *(int *)p;
 }
+static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
+				       unsigned long a1, unsigned long a2,
+				       unsigned long a3)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_syscall_entry(arch, major, a0, a1, a2, a3);
+}
 static inline void audit_syscall_exit(void *pt_regs)
 {
 	if (unlikely(current->audit_context)) {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3d2853808185..b408100dd6ef 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1632,7 +1632,7 @@ void audit_free(struct task_struct *tsk)
  * will only be written if another part of the kernel requests that it
  * be written).
  */
-void audit_syscall_entry(int arch, int major,
+void __audit_syscall_entry(int arch, int major,
 			 unsigned long a1, unsigned long a2,
 			 unsigned long a3, unsigned long a4)
 {
-- 
cgit v1.2.3


From 07c49417877f8658a6aa0ad9b4e21e4fd4df11b6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: inline checks for not needing to collect aux records

A number of audit hooks make function calls before they determine that
auxilary records do not need to be collected.  Do those checks as static
inlines since the most common case is going to be that records are not
needed and we can skip the function call overhead.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 23 ++++++++++++++++++++---
 kernel/auditsc.c      | 15 +++------------
 2 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index f56ce2669b83..cf16faff6b8a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -489,9 +489,9 @@ extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern void audit_log_task_context(struct audit_buffer *ab);
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
-extern int audit_bprm(struct linux_binprm *bprm);
-extern void audit_socketcall(int nargs, unsigned long *args);
-extern int audit_sockaddr(int len, void *addr);
+extern int __audit_bprm(struct linux_binprm *bprm);
+extern void __audit_socketcall(int nargs, unsigned long *args);
+extern int __audit_sockaddr(int len, void *addr);
 extern void __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
 extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr);
@@ -519,6 +519,23 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
+static inline int audit_bprm(struct linux_binprm *bprm)
+{
+	if (unlikely(!audit_dummy_context()))
+		return __audit_bprm(bprm);
+	return 0;
+}
+static inline void audit_socketcall(int nargs, unsigned long *args)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_socketcall(nargs, args);
+}
+static inline int audit_sockaddr(int len, void *addr)
+{
+	if (unlikely(!audit_dummy_context()))
+		return __audit_sockaddr(len, addr);
+	return 0;
+}
 static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
 {
 	if (unlikely(!audit_dummy_context()))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index d7382c2aaa9e..e1062f66b01b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2309,14 +2309,11 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
 	context->ipc.has_perm = 1;
 }
 
-int audit_bprm(struct linux_binprm *bprm)
+int __audit_bprm(struct linux_binprm *bprm)
 {
 	struct audit_aux_data_execve *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!audit_enabled || !context || context->dummy))
-		return 0;
-
 	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
 	if (!ax)
 		return -ENOMEM;
@@ -2337,13 +2334,10 @@ int audit_bprm(struct linux_binprm *bprm)
  * @args: args array
  *
  */
-void audit_socketcall(int nargs, unsigned long *args)
+void __audit_socketcall(int nargs, unsigned long *args)
 {
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context || context->dummy))
-		return;
-
 	context->type = AUDIT_SOCKETCALL;
 	context->socketcall.nargs = nargs;
 	memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long));
@@ -2369,13 +2363,10 @@ void __audit_fd_pair(int fd1, int fd2)
  *
  * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_sockaddr(int len, void *a)
+int __audit_sockaddr(int len, void *a)
 {
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context || context->dummy))
-		return 0;
-
 	if (!context->sockaddr) {
 		void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL);
 		if (!p)
-- 
cgit v1.2.3


From 38cdce53daa0408a61fe6d86fe48f31515c9b840 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: drop audit_set_macxattr as it doesn't do anything

unused.  deleted.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index cf16faff6b8a..4f1efe3e8616 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -493,7 +493,6 @@ extern int __audit_bprm(struct linux_binprm *bprm);
 extern void __audit_socketcall(int nargs, unsigned long *args);
 extern int __audit_sockaddr(int len, void *addr);
 extern void __audit_fd_pair(int fd1, int fd2);
-extern int audit_set_macxattr(const char *name);
 extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr);
 extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
 extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
@@ -606,7 +605,6 @@ extern int audit_signals;
 #define audit_socketcall(n,a) ((void)0)
 #define audit_fd_pair(n,a) ((void)0)
 #define audit_sockaddr(len, addr) ({ 0; })
-#define audit_set_macxattr(n) do { ; } while (0)
 #define audit_mq_open(o,m,a) ((void)0)
 #define audit_mq_sendrecv(d,l,p,t) ((void)0)
 #define audit_mq_notify(d,n) ((void)0)
-- 
cgit v1.2.3


From a4ff8dba7d8ce5ceb43fb27df66292251cc73bdc Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: inline audit_free to simplify the look of generic code

make the conditional a static inline instead of doing it in generic code.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 7 ++++++-
 kernel/auditsc.c      | 2 +-
 kernel/exit.c         | 3 +--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 4f1efe3e8616..8eb8bda749b3 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -417,7 +417,7 @@ extern int audit_classify_arch(int arch);
 				/* Public API */
 extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
-extern void audit_free(struct task_struct *task);
+extern void __audit_free(struct task_struct *task);
 extern void __audit_syscall_entry(int arch,
 				  int major, unsigned long a0, unsigned long a1,
 				  unsigned long a2, unsigned long a3);
@@ -435,6 +435,11 @@ static inline int audit_dummy_context(void)
 	void *p = current->audit_context;
 	return !p || *(int *)p;
 }
+static inline void audit_free(struct task_struct *task)
+{
+	if (unlikely(task->audit_context))
+		__audit_free(task);
+}
 static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e1062f66b01b..7aaeb38b262a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1594,7 +1594,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
  *
  * Called from copy_process and do_exit
  */
-void audit_free(struct task_struct *tsk)
+void __audit_free(struct task_struct *tsk)
 {
 	struct audit_context *context;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 94ed6e20bb53..88dcbbc446f7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -964,8 +964,7 @@ NORET_TYPE void do_exit(long code)
 	acct_collect(code, group_dead);
 	if (group_dead)
 		tty_audit_exit();
-	if (unlikely(tsk->audit_context))
-		audit_free(tsk);
+	audit_free(tsk);
 
 	tsk->exit_code = code;
 	taskstats_exit(tsk, group_dead);
-- 
cgit v1.2.3


From 6422e78de6880c66a82af512d9bd0c85eb62e661 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: remove audit_finish_fork as it can't be called

Audit entry,always rules are not allowed and are automatically changed in
exit,always rules in userspace.  The kernel refuses to load such rules.

Thus a task in the middle of a syscall (and thus in audit_finish_fork())
can only be in one of two states: AUDIT_BUILD_CONTEXT or AUDIT_DISABLED.
Since the current task cannot be in AUDIT_RECORD_CONTEXT we aren't every
going to actually use the code in audit_finish_fork() since it will
return without doing anything.  Thus drop the code.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  2 --
 kernel/auditsc.c      | 20 --------------------
 kernel/fork.c         |  2 --
 3 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8eb8bda749b3..67b66c37a254 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -415,7 +415,6 @@ extern int audit_classify_arch(int arch);
 #ifdef CONFIG_AUDITSYSCALL
 /* These are defined in auditsc.c */
 				/* Public API */
-extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
 extern void __audit_free(struct task_struct *task);
 extern void __audit_syscall_entry(int arch,
@@ -586,7 +585,6 @@ static inline void audit_mmap_fd(int fd, int flags)
 extern int audit_n_rules;
 extern int audit_signals;
 #else /* CONFIG_AUDITSYSCALL */
-#define audit_finish_fork(t)
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
 #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7aaeb38b262a..4d8920f5ab88 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1707,26 +1707,6 @@ void __audit_syscall_entry(int arch, int major,
 	context->ppid       = 0;
 }
 
-void audit_finish_fork(struct task_struct *child)
-{
-	struct audit_context *ctx = current->audit_context;
-	struct audit_context *p = child->audit_context;
-	if (!p || !ctx)
-		return;
-	if (!ctx->in_syscall || ctx->current_state != AUDIT_RECORD_CONTEXT)
-		return;
-	p->arch = ctx->arch;
-	p->major = ctx->major;
-	memcpy(p->argv, ctx->argv, sizeof(ctx->argv));
-	p->ctime = ctx->ctime;
-	p->dummy = ctx->dummy;
-	p->in_syscall = ctx->in_syscall;
-	p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL);
-	p->ppid = current->pid;
-	p->prio = ctx->prio;
-	p->current_state = ctx->current_state;
-}
-
 /**
  * audit_syscall_exit - deallocate audit context after a system call
  * @pt_regs: syscall registers
diff --git a/kernel/fork.c b/kernel/fork.c
index 443f5125f11e..c1e5c21f48c1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1525,8 +1525,6 @@ long do_fork(unsigned long clone_flags,
 			init_completion(&vfork);
 		}
 
-		audit_finish_fork(p);
-
 		/*
 		 * We set PF_STARTING at creation in case tracing wants to
 		 * use this to distinguish a fully live task from one that
-- 
cgit v1.2.3


From efaffd6e4417860c67576ac760dd6e8bbd15f006 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: allow matching on obj_uid

Allow syscall exit filter matching based on the uid of the owner of an
inode used in a syscall.  aka:

auditctl -a always,exit -S open -F obj_uid=0 -F perm=wa

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  1 +
 kernel/auditfilter.c  |  1 +
 kernel/auditsc.c      | 12 ++++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 67b66c37a254..55cb3daaf474 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -223,6 +223,7 @@
 #define AUDIT_PERM	106
 #define AUDIT_DIR	107
 #define AUDIT_FILETYPE	108
+#define AUDIT_OBJ_UID	109
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 903caa269b5c..13e997423dcd 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -461,6 +461,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_ARG1:
 		case AUDIT_ARG2:
 		case AUDIT_ARG3:
+		case AUDIT_OBJ_UID:
 			break;
 		case AUDIT_ARCH:
 			entry->rule.arch_f = f;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4d8920f5ab88..5cf3ecc01517 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -586,6 +586,18 @@ static int audit_filter_rules(struct task_struct *tsk,
 				}
 			}
 			break;
+		case AUDIT_OBJ_UID:
+			if (name) {
+				result = audit_comparator(name->uid, f->op, f->val);
+			} else if (ctx) {
+				list_for_each_entry(n, &ctx->names_list, list) {
+					if (audit_comparator(n->uid, f->op, f->val)) {
+						++result;
+						break;
+					}
+				}
+			}
+			break;
 		case AUDIT_WATCH:
 			if (name)
 				result = audit_watch_compare(rule->watch, name->ino, name->dev);
-- 
cgit v1.2.3


From 54d3218b31aee5bc9c859ae60fbde933d922448b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: allow audit matching on inode gid

Much like the ability to filter audit on the uid of an inode collected, we
should be able to filter on the gid of the inode.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  1 +
 kernel/auditfilter.c  |  1 +
 kernel/auditsc.c      | 12 ++++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 55cb3daaf474..e36aa37c88af 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -224,6 +224,7 @@
 #define AUDIT_DIR	107
 #define AUDIT_FILETYPE	108
 #define AUDIT_OBJ_UID	109
+#define AUDIT_OBJ_GID	110
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 13e997423dcd..f10605c787e6 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -462,6 +462,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_ARG2:
 		case AUDIT_ARG3:
 		case AUDIT_OBJ_UID:
+		case AUDIT_OBJ_GID:
 			break;
 		case AUDIT_ARCH:
 			entry->rule.arch_f = f;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 5cf3ecc01517..87b375fb12ff 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -598,6 +598,18 @@ static int audit_filter_rules(struct task_struct *tsk,
 				}
 			}
 			break;
+		case AUDIT_OBJ_GID:
+			if (name) {
+				result = audit_comparator(name->gid, f->op, f->val);
+			} else if (ctx) {
+				list_for_each_entry(n, &ctx->names_list, list) {
+					if (audit_comparator(n->gid, f->op, f->val)) {
+						++result;
+						break;
+					}
+				}
+			}
+			break;
 		case AUDIT_WATCH:
 			if (name)
 				result = audit_watch_compare(rule->watch, name->ino, name->dev);
-- 
cgit v1.2.3


From 0a300be6d5be8f66cd96609334710c268d0bfdce Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:08 -0500
Subject: audit: remove task argument to audit_set_loginuid

The function always deals with current.  Don't expose an option
pretending one can use it for something.  You can't.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/proc/base.c        | 2 +-
 include/linux/audit.h | 2 +-
 kernel/auditsc.c      | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8173dfd89cb2..e3cbebbabebd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1228,7 +1228,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
 		goto out_free_page;
 
 	}
-	length = audit_set_loginuid(current, loginuid);
+	length = audit_set_loginuid(loginuid);
 	if (likely(length == 0))
 		length = count;
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index e36aa37c88af..7cbd6fe41573 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -489,7 +489,7 @@ static inline void audit_ptrace(struct task_struct *t)
 extern unsigned int audit_serial(void);
 extern int auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec *t, unsigned int *serial);
-extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
+extern int  audit_set_loginuid(uid_t loginuid);
 #define audit_get_loginuid(t) ((t)->loginuid)
 #define audit_get_sessionid(t) ((t)->sessionid)
 extern void audit_log_task_context(struct audit_buffer *ab);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 87b375fb12ff..9d6dd7d869c0 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2163,16 +2163,16 @@ int auditsc_get_stamp(struct audit_context *ctx,
 static atomic_t session_id = ATOMIC_INIT(0);
 
 /**
- * audit_set_loginuid - set a task's audit_context loginuid
- * @task: task whose audit context is being modified
+ * audit_set_loginuid - set current task's audit_context loginuid
  * @loginuid: loginuid value
  *
  * Returns 0.
  *
  * Called (set) from fs/proc/base.c::proc_loginuid_write().
  */
-int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
+int audit_set_loginuid(uid_t loginuid)
 {
+	struct task_struct *task = current;
 	unsigned int sessionid = atomic_inc_return(&session_id);
 	struct audit_context *context = task->audit_context;
 
-- 
cgit v1.2.3


From 02d86a568c6d2d335256864451ac8ce781bc5652 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:08 -0500
Subject: audit: allow interfield comparison in audit rules

We wish to be able to audit when a uid=500 task accesses a file which is
uid=0.  Or vice versa.  This patch introduces a new audit filter type
AUDIT_FIELD_COMPARE which takes as an 'enum' which indicates which fields
should be compared.  At this point we only define the task->uid vs
inode->uid, but other comparisons can be added.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  4 ++++
 kernel/auditfilter.c  |  5 ++++-
 kernel/auditsc.c      | 30 +++++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 7cbd6fe41573..838e05fc0582 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -182,7 +182,10 @@
  * AUDIT_UNUSED_BITS is updated if need be. */
 #define AUDIT_UNUSED_BITS	0x07FFFC00
 
+/* AUDIT_FIELD_COMPARE rule list */
+#define AUDIT_COMPARE_UID_TO_OBJ_UID	1
 
+#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_UID_TO_OBJ_UID
 /* Rule fields */
 				/* These are useful when checking the
 				 * task structure at task creation time
@@ -225,6 +228,7 @@
 #define AUDIT_FILETYPE	108
 #define AUDIT_OBJ_UID	109
 #define AUDIT_OBJ_GID	110
+#define AUDIT_FIELD_COMPARE	111
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index f10605c787e6..a6c3f1abd206 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -526,7 +526,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 				goto exit_free;
 			break;
 		case AUDIT_FILTERKEY:
-			err = -EINVAL;
 			if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
 				goto exit_free;
 			str = audit_unpack_string(&bufp, &remain, f->val);
@@ -543,6 +542,10 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 			if (f->val & ~S_IFMT)
 				goto exit_free;
 			break;
+		case AUDIT_FIELD_COMPARE:
+			if (f->val > AUDIT_MAX_FIELD_COMPARE)
+				goto exit_free;
+			break;
 		default:
 			goto exit_free;
 		}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9161e70a4379..8fb2c8e6d624 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -463,6 +463,32 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree)
 	return 0;
 }
 
+static int audit_field_compare(struct task_struct *tsk,
+			       const struct cred *cred,
+			       struct audit_field *f,
+			       struct audit_context *ctx,
+			       struct audit_names *name)
+{
+	struct audit_names *n;
+
+	switch (f->val) {
+	case AUDIT_COMPARE_UID_TO_OBJ_UID:
+		if (name) {
+			return audit_comparator(cred->uid, f->op, name->uid);
+		} else if (ctx) {
+			list_for_each_entry(n, &ctx->names_list, list) {
+				if (audit_comparator(cred->uid, f->op, n->uid))
+					return 1;
+			}
+		}
+		break;
+	default:
+		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
+		return 0;
+	}
+	return 0;
+}
+
 /* Determine if any context name data matches a rule's watch data */
 /* Compare a task_struct with an audit_rule.  Return 1 on match, 0
  * otherwise.
@@ -693,8 +719,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 		case AUDIT_FILETYPE:
 			result = audit_match_filetype(ctx, f->val);
 			break;
+		case AUDIT_FIELD_COMPARE:
+			result = audit_field_compare(tsk, cred, f, ctx, name);
+			break;
 		}
-
 		if (!result)
 			return 0;
 	}
-- 
cgit v1.2.3


From c9fe685f7a17a0ee8bf3fbe51e40b1c8b8e65896 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:08 -0500
Subject: audit: allow interfield comparison between gid and ogid

Allow audit rules to compare the gid of the running task to the gid of the
inode in question.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 3 ++-
 kernel/auditsc.c      | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 838e05fc0582..fffbc2176ee1 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -184,8 +184,9 @@
 
 /* AUDIT_FIELD_COMPARE rule list */
 #define AUDIT_COMPARE_UID_TO_OBJ_UID	1
+#define AUDIT_COMPARE_GID_TO_OBJ_GID	2
 
-#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_UID_TO_OBJ_UID
+#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_GID_TO_OBJ_GID
 /* Rule fields */
 				/* These are useful when checking the
 				 * task structure at task creation time
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b12cc32fe377..861c7b9c565a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -474,6 +474,8 @@ static int audit_compare_id(uid_t uid1,
 	uid_t uid2;
 	int rc;
 
+	BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t));
+
 	if (name) {
 		addr = (unsigned long)name;
 		addr += name_offset;
@@ -510,6 +512,10 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_id(cred->uid,
 					name, offsetof(struct audit_names, uid),
 					f, ctx);
+	case AUDIT_COMPARE_GID_TO_OBJ_GID:
+		return audit_compare_id(cred->gid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
 	default:
 		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
 		return 0;
-- 
cgit v1.2.3


From 4a6633ed08af5ba67790b4d1adcdeb8ceb55677e Mon Sep 17 00:00:00 2001
From: Peter Moody <pmoody@google.com>
Date: Tue, 13 Dec 2011 16:17:51 -0800
Subject: audit: implement all object interfield comparisons

This completes the matrix of interfield comparisons between uid/gid
information for the current task and the uid/gid information for inodes.
aka I can audit based on differences between the euid of the process and
the uid of fs objects.

Signed-off-by: Peter Moody <pmoody@google.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 10 +++++++++-
 kernel/auditsc.c      | 29 +++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index fffbc2176ee1..67113cb4bc15 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -185,8 +185,16 @@
 /* AUDIT_FIELD_COMPARE rule list */
 #define AUDIT_COMPARE_UID_TO_OBJ_UID	1
 #define AUDIT_COMPARE_GID_TO_OBJ_GID	2
+#define AUDIT_COMPARE_EUID_TO_OBJ_UID	3
+#define AUDIT_COMPARE_EGID_TO_OBJ_GID	4
+#define AUDIT_COMPARE_AUID_TO_OBJ_UID	5
+#define AUDIT_COMPARE_SUID_TO_OBJ_UID	6
+#define AUDIT_COMPARE_SGID_TO_OBJ_GID	7
+#define AUDIT_COMPARE_FSUID_TO_OBJ_UID	8
+#define AUDIT_COMPARE_FSGID_TO_OBJ_GID	9
+
+#define AUDIT_MAX_FIELD_COMPARE		AUDIT_COMPARE_FSGID_TO_OBJ_GID
 
-#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_GID_TO_OBJ_GID
 /* Rule fields */
 				/* These are useful when checking the
 				 * task structure at task creation time
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 861c7b9c565a..b8cee462b99e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -508,6 +508,7 @@ static int audit_field_compare(struct task_struct *tsk,
 			       struct audit_names *name)
 {
 	switch (f->val) {
+	/* process to file object comparisons */
 	case AUDIT_COMPARE_UID_TO_OBJ_UID:
 		return audit_compare_id(cred->uid,
 					name, offsetof(struct audit_names, uid),
@@ -516,6 +517,34 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_id(cred->gid,
 					name, offsetof(struct audit_names, gid),
 					f, ctx);
+	case AUDIT_COMPARE_EUID_TO_OBJ_UID:
+		return audit_compare_id(cred->euid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_EGID_TO_OBJ_GID:
+		return audit_compare_id(cred->egid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
+	case AUDIT_COMPARE_AUID_TO_OBJ_UID:
+		return audit_compare_id(tsk->loginuid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_SUID_TO_OBJ_UID:
+		return audit_compare_id(cred->suid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_SGID_TO_OBJ_GID:
+		return audit_compare_id(cred->sgid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
+	case AUDIT_COMPARE_FSUID_TO_OBJ_UID:
+		return audit_compare_id(cred->fsuid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_FSGID_TO_OBJ_GID:
+		return audit_compare_id(cred->fsgid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
 	default:
 		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
 		return 0;
-- 
cgit v1.2.3


From 10d68360871657204885371cdf2594412675d2f9 Mon Sep 17 00:00:00 2001
From: Peter Moody <pmoody@google.com>
Date: Wed, 4 Jan 2012 15:24:31 -0500
Subject: audit: comparison on interprocess fields

This allows audit to specify rules in which we compare two fields of a
process.  Such as is the running process uid != to the running process
euid?

Signed-off-by: Peter Moody <pmoody@google.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 24 +++++++++++++++++++++++-
 kernel/auditsc.c      | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 67113cb4bc15..9ff7a2c48b50 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -193,7 +193,29 @@
 #define AUDIT_COMPARE_FSUID_TO_OBJ_UID	8
 #define AUDIT_COMPARE_FSGID_TO_OBJ_GID	9
 
-#define AUDIT_MAX_FIELD_COMPARE		AUDIT_COMPARE_FSGID_TO_OBJ_GID
+#define AUDIT_COMPARE_UID_TO_AUID	10
+#define AUDIT_COMPARE_UID_TO_EUID	11
+#define AUDIT_COMPARE_UID_TO_FSUID	12
+#define AUDIT_COMPARE_UID_TO_SUID	13
+
+#define AUDIT_COMPARE_AUID_TO_FSUID	14
+#define AUDIT_COMPARE_AUID_TO_SUID	15
+#define AUDIT_COMPARE_AUID_TO_EUID	16
+
+#define AUDIT_COMPARE_EUID_TO_SUID	17
+#define AUDIT_COMPARE_EUID_TO_FSUID	18
+
+#define AUDIT_COMPARE_SUID_TO_FSUID	19
+
+#define AUDIT_COMPARE_GID_TO_EGID	20
+#define AUDIT_COMPARE_GID_TO_FSGID	21
+#define AUDIT_COMPARE_GID_TO_SGID	22
+
+#define AUDIT_COMPARE_EGID_TO_FSGID	23
+#define AUDIT_COMPARE_EGID_TO_SGID	24
+#define AUDIT_COMPARE_SGID_TO_FSGID	25
+
+#define AUDIT_MAX_FIELD_COMPARE		AUDIT_COMPARE_SGID_TO_FSGID
 
 /* Rule fields */
 				/* These are useful when checking the
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b8cee462b99e..593237e3654d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -545,6 +545,45 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_id(cred->fsgid,
 					name, offsetof(struct audit_names, gid),
 					f, ctx);
+	/* uid comparisons */
+	case AUDIT_COMPARE_UID_TO_AUID:
+		return audit_comparator(cred->uid, f->op, tsk->loginuid);
+	case AUDIT_COMPARE_UID_TO_EUID:
+		return audit_comparator(cred->uid, f->op, cred->euid);
+	case AUDIT_COMPARE_UID_TO_SUID:
+		return audit_comparator(cred->uid, f->op, cred->suid);
+	case AUDIT_COMPARE_UID_TO_FSUID:
+		return audit_comparator(cred->uid, f->op, cred->fsuid);
+	/* auid comparisons */
+	case AUDIT_COMPARE_AUID_TO_EUID:
+		return audit_comparator(tsk->loginuid, f->op, cred->euid);
+	case AUDIT_COMPARE_AUID_TO_SUID:
+		return audit_comparator(tsk->loginuid, f->op, cred->suid);
+	case AUDIT_COMPARE_AUID_TO_FSUID:
+		return audit_comparator(tsk->loginuid, f->op, cred->fsuid);
+	/* euid comparisons */
+	case AUDIT_COMPARE_EUID_TO_SUID:
+		return audit_comparator(cred->euid, f->op, cred->suid);
+	case AUDIT_COMPARE_EUID_TO_FSUID:
+		return audit_comparator(cred->euid, f->op, cred->fsuid);
+	/* suid comparisons */
+	case AUDIT_COMPARE_SUID_TO_FSUID:
+		return audit_comparator(cred->suid, f->op, cred->fsuid);
+	/* gid comparisons */
+	case AUDIT_COMPARE_GID_TO_EGID:
+		return audit_comparator(cred->gid, f->op, cred->egid);
+	case AUDIT_COMPARE_GID_TO_SGID:
+		return audit_comparator(cred->gid, f->op, cred->sgid);
+	case AUDIT_COMPARE_GID_TO_FSGID:
+		return audit_comparator(cred->gid, f->op, cred->fsgid);
+	/* egid comparisons */
+	case AUDIT_COMPARE_EGID_TO_SGID:
+		return audit_comparator(cred->egid, f->op, cred->sgid);
+	case AUDIT_COMPARE_EGID_TO_FSGID:
+		return audit_comparator(cred->egid, f->op, cred->fsgid);
+	/* sgid comparison */
+	case AUDIT_COMPARE_SGID_TO_FSGID:
+		return audit_comparator(cred->sgid, f->op, cred->fsgid);
 	default:
 		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
 		return 0;
-- 
cgit v1.2.3


From 67175b855bfd6ed95ffeff95532173c07de6432d Mon Sep 17 00:00:00 2001
From: James Bottomley <jbottomley@parallels.com>
Date: Tue, 17 Jan 2012 21:14:05 +0000
Subject: Fix compile breakage with kref.h

This set of build failures just started appearing on parisc:

  In file included from drivers/input/serio/serio_raw.c:12:
  include/linux/kref.h: In function 'kref_get':
  include/linux/kref.h:40: error: 'TAINT_WARN' undeclared (first use in this function)
  include/linux/kref.h:40: error: (Each undeclared identifier is reported only once
  include/linux/kref.h:40: error: for each function it appears in.)
  include/linux/kref.h: In function 'kref_sub':
  include/linux/kref.h:65: error: 'TAINT_WARN' undeclared (first use in this function)

It happens because TAINT_WARN is defined in kernel.h and this particular
compile doesn't seem to include it (no idea why it's just manifesting ..
probably some #include file untangling exposed it).

Fix by adding

  #include <linux/kernel.h>

to linux/kref.h

Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kref.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index abc0120b09b7..9c07dcebded7 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -17,6 +17,7 @@
 
 #include <linux/bug.h>
 #include <linux/atomic.h>
+#include <linux/kernel.h>
 
 struct kref {
 	atomic_t refcount;
-- 
cgit v1.2.3


From ee0b31a25a010116f44fca6c96f4516d417793dd Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 17 Jan 2012 20:39:51 +0000
Subject: keys: fix trusted/encrypted keys sparse rcu_assign_pointer messages

Define rcu_assign_keypointer(), which uses the key payload.rcudata instead
of payload.data, to resolve the CONFIG_SPARSE_RCU_POINTER message:
"incompatible types in comparison expression (different address spaces)"

Replace the rcu_assign_pointer() calls in encrypted/trusted keys with
rcu_assign_keypointer().

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key.h                              | 3 +++
 security/keys/encrypted-keys/encrypted.c         | 4 ++--
 security/keys/encrypted-keys/masterkey_trusted.c | 2 ++
 security/keys/trusted.c                          | 4 ++--
 4 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 183a6af7715d..bfc014c57351 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -293,6 +293,9 @@ static inline bool key_is_instantiated(const struct key *key)
 	(rcu_dereference_protected((KEY)->payload.rcudata,		\
 				   rwsem_is_locked(&((struct key *)(KEY))->sem)))
 
+#define rcu_assign_keypointer(KEY, PAYLOAD)				\
+	(rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD))
+
 #ifdef CONFIG_SYSCTL
 extern ctl_table key_sysctls[];
 #endif
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 41144f71d615..d91efb6901e9 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -810,7 +810,7 @@ static int encrypted_instantiate(struct key *key, const void *data,
 		goto out;
 	}
 
-	rcu_assign_pointer(key->payload.data, epayload);
+	rcu_assign_keypointer(key, epayload);
 out:
 	kfree(datablob);
 	return ret;
@@ -874,7 +874,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen)
 	memcpy(new_epayload->payload_data, epayload->payload_data,
 	       epayload->payload_datalen);
 
-	rcu_assign_pointer(key->payload.data, new_epayload);
+	rcu_assign_keypointer(key, new_epayload);
 	call_rcu(&epayload->rcu, encrypted_rcu_free);
 out:
 	kfree(buf);
diff --git a/security/keys/encrypted-keys/masterkey_trusted.c b/security/keys/encrypted-keys/masterkey_trusted.c
index df87272e3f51..8c16c3e472e7 100644
--- a/security/keys/encrypted-keys/masterkey_trusted.c
+++ b/security/keys/encrypted-keys/masterkey_trusted.c
@@ -18,6 +18,8 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <keys/trusted-type.h>
+#include <keys/encrypted-type.h>
+#include "encrypted.h"
 
 /*
  * request_trusted_key - request the trusted key
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 0ed5fdf238a2..2d5d041f2049 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -993,7 +993,7 @@ out:
 	kfree(datablob);
 	kfree(options);
 	if (!ret)
-		rcu_assign_pointer(key->payload.data, payload);
+		rcu_assign_keypointer(key, payload);
 	else
 		kfree(payload);
 	return ret;
@@ -1067,7 +1067,7 @@ static int trusted_update(struct key *key, const void *data, size_t datalen)
 			goto out;
 		}
 	}
-	rcu_assign_pointer(key->payload.data, new_p);
+	rcu_assign_keypointer(key, new_p);
 	call_rcu(&p->rcu, trusted_rcu_free);
 out:
 	kfree(datablob);
-- 
cgit v1.2.3


From 5e8898e97a5db4125d944070922164d1d09a2689 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Tue, 17 Jan 2012 17:12:03 +0200
Subject: lib: digital signature config option name change

It was reported that DIGSIG is confusing name for digital signature
module. It was suggested to rename DIGSIG to SIGNATURE.

Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
Suggested-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/digsig.h     | 4 ++--
 lib/Kconfig                | 2 +-
 lib/Makefile               | 2 +-
 security/integrity/Kconfig | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/digsig.h b/include/linux/digsig.h
index efae755017d7..b01558b15814 100644
--- a/include/linux/digsig.h
+++ b/include/linux/digsig.h
@@ -46,7 +46,7 @@ struct signature_hdr {
 	char		mpi[0];
 } __packed;
 
-#if defined(CONFIG_DIGSIG) || defined(CONFIG_DIGSIG_MODULE)
+#if defined(CONFIG_SIGNATURE) || defined(CONFIG_SIGNATURE_MODULE)
 
 int digsig_verify(struct key *keyring, const char *sig, int siglen,
 					const char *digest, int digestlen);
@@ -59,6 +59,6 @@ static inline int digsig_verify(struct key *keyring, const char *sig,
 	return -EOPNOTSUPP;
 }
 
-#endif /* CONFIG_DIGSIG */
+#endif /* CONFIG_SIGNATURE */
 
 #endif /* _DIGSIG_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 201e1b33d721..854735d96dc3 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -302,7 +302,7 @@ config MPILIB_EXTRA
 	  This code in unnecessary for RSA digital signature verification,
 	  and can be compiled if needed.
 
-config DIGSIG
+config SIGNATURE
 	tristate "In-kernel signature checker"
 	depends on KEYS
 	select MPILIB
diff --git a/lib/Makefile b/lib/Makefile
index dace162c7e1c..d71aae1b01b3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -119,7 +119,7 @@ obj-$(CONFIG_CORDIC) += cordic.o
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
 obj-$(CONFIG_MPILIB) += mpi/
-obj-$(CONFIG_DIGSIG) += digsig.o
+obj-$(CONFIG_SIGNATURE) += digsig.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index d384ea921482..ff60bf72881f 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -7,7 +7,7 @@ config INTEGRITY_DIGSIG
 	boolean "Digital signature verification using multiple keyrings"
 	depends on INTEGRITY && KEYS
 	default n
-	select DIGSIG
+	select SIGNATURE
 	help
 	  This option enables digital signature verification support
 	  using multiple keyrings. It defines separate keyrings for each
-- 
cgit v1.2.3


From 65b7f839ceecc0a36c7969c0c9151d5748cd4242 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Tue, 17 Jan 2012 22:40:08 +0100
Subject: intel_idle: Split up and provide per CPU initialization func

Function split up, should have no functional change.

Provides entry point for physically hotplugged CPUs
to initialize and activate cpuidle.

Signed-off-by: Thomas Renninger <trenn@suse.de>
CC: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
CC: Shaohua Li <shaohua.li@intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/idle/intel_idle.c | 82 ++++++++++++++++++++++++-----------------------
 include/linux/cpuidle.h   |  7 ++++
 2 files changed, 49 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 5d2f8e13cf0e..ef0c04d8dc22 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -478,64 +478,60 @@ static int intel_idle_cpuidle_driver_init(void)
 
 
 /*
- * intel_idle_cpuidle_devices_init()
+ * intel_idle_cpu_init()
  * allocate, initialize, register cpuidle_devices
+ * @cpu: cpu/core to initialize
  */
-static int intel_idle_cpuidle_devices_init(void)
+int intel_idle_cpu_init(int cpu)
 {
-	int i, cstate;
+	int cstate;
 	struct cpuidle_device *dev;
 
-	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
-	if (intel_idle_cpuidle_devices == NULL)
-		return -ENOMEM;
-
-	for_each_online_cpu(i) {
-		dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
+	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
 
-		dev->state_count = 1;
+	dev->state_count = 1;
 
-		for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
-			int num_substates;
+	for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
+		int num_substates;
 
-			if (cstate > max_cstate) {
-				printk(PREFIX "max_cstate %d reached\n",
-					max_cstate);
-				break;
-			}
+		if (cstate > max_cstate) {
+			printk(PREFIX "max_cstate %d reached\n",
+			       max_cstate);
+			break;
+		}
 
-			/* does the state exist in CPUID.MWAIT? */
-			num_substates = (mwait_substates >> ((cstate) * 4))
-						& MWAIT_SUBSTATE_MASK;
-			if (num_substates == 0)
-				continue;
-			/* is the state not enabled? */
-			if (cpuidle_state_table[cstate].enter == NULL) {
-				continue;
-			}
+		/* does the state exist in CPUID.MWAIT? */
+		num_substates = (mwait_substates >> ((cstate) * 4))
+			& MWAIT_SUBSTATE_MASK;
+		if (num_substates == 0)
+			continue;
+		/* is the state not enabled? */
+		if (cpuidle_state_table[cstate].enter == NULL)
+			continue;
 
-			dev->states_usage[dev->state_count].driver_data =
-				(void *)get_driver_data(cstate);
+		dev->states_usage[dev->state_count].driver_data =
+			(void *)get_driver_data(cstate);
 
 			dev->state_count += 1;
 		}
+	dev->cpu = cpu;
 
-		dev->cpu = i;
-		if (cpuidle_register_device(dev)) {
-			pr_debug(PREFIX "cpuidle_register_device %d failed!\n",
-				 i);
-			intel_idle_cpuidle_devices_uninit();
-			return -EIO;
-		}
+	if (cpuidle_register_device(dev)) {
+		pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu);
+		intel_idle_cpuidle_devices_uninit();
+		return -EIO;
 	}
 
+	if (auto_demotion_disable_flags)
+		smp_call_function_single(cpu, auto_demotion_disable, NULL, 1);
+
 	return 0;
 }
 
 
 static int __init intel_idle_init(void)
 {
-	int retval;
+	int retval, i;
 
 	/* Do not load intel_idle at all for now if idle= is passed */
 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
@@ -553,10 +549,16 @@ static int __init intel_idle_init(void)
 		return retval;
 	}
 
-	retval = intel_idle_cpuidle_devices_init();
-	if (retval) {
-		cpuidle_unregister_driver(&intel_idle_driver);
-		return retval;
+	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+	if (intel_idle_cpuidle_devices == NULL)
+		return -ENOMEM;
+
+	for_each_online_cpu(i) {
+		retval = intel_idle_cpu_init(i);
+		if (retval) {
+			cpuidle_unregister_driver(&intel_idle_driver);
+			return retval;
+		}
 	}
 
 	return 0;
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 7408af843b8a..93df66ea794a 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -188,7 +188,14 @@ struct cpuidle_governor {
 extern int cpuidle_register_governor(struct cpuidle_governor *gov);
 extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
 
+#ifdef CONFIG_INTEL_IDLE
+extern int intel_idle_cpu_init(int cpu);
 #else
+static inline int intel_idle_cpu_init(int cpu) { return -1; }
+#endif
+
+#else
+static inline int intel_idle_cpu_init(int cpu) { return -1; }
 
 static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 {return 0;}
-- 
cgit v1.2.3


From 456a8167e94b66f406c27400a46a707b870452b0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 18 Jan 2012 10:04:29 +0000
Subject: KEYS: Permit key_serial() to be called with a const key pointer

Permit key_serial() to be called with a const key pointer.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index bfc014c57351..5253471cd2ea 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -271,7 +271,7 @@ extern int keyring_add_key(struct key *keyring,
 
 extern struct key *key_lookup(key_serial_t id);
 
-static inline key_serial_t key_serial(struct key *key)
+static inline key_serial_t key_serial(const struct key *key)
 {
 	return key ? key->serial : 0;
 }
-- 
cgit v1.2.3


From 72081624d5ad3cf56deb6e727b78c4e7a55e4eec Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 19 Jan 2012 23:25:33 +0100
Subject: PM / Hibernate: Rewrite unlock_system_sleep() to fix s2disk
 regression

Commit 33e638b, "PM / Sleep: Use the freezer_count() functions in
[un]lock_system_sleep() APIs" introduced an undesirable change in the
behaviour of unlock_system_sleep() since freezer_count() internally calls
try_to_freeze() - which we don't need in unlock_system_sleep().

And commit bcda53f, "PM / Sleep: Replace mutex_[un]lock(&pm_mutex) with
[un]lock_system_sleep()" made these APIs wide-spread. This caused a
regression in suspend-to-disk where snapshot_read() and snapshot_write()
were getting frozen due to the try_to_freeze embedded in
unlock_system_sleep(), since these functions were invoked when the freezing
condition was still in effect.

Fix this by rewriting unlock_system_sleep() by open-coding freezer_count()
and dropping the try_to_freeze() part. Not only will this fix the
regression but this will also ensure that the API only does what it is
intended to do, and nothing more, under the hood.

While at it, make the code more correct and robust by ensuring that the
PF_FREEZER_SKIP flag gets cleared with pm_mutex held, to avoid a race with
the freezer.

Also, to be on the safer side, open-code freezer_do_not_count() as well
(inside lock_system_sleep()), to ensure that any unrelated modification to
freezer[_do_not]_count() does not break things again!

Reported-and-tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/suspend.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 95040cc33107..91784a4f8608 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -357,14 +357,29 @@ extern bool pm_save_wakeup_count(unsigned int count);
 
 static inline void lock_system_sleep(void)
 {
-	freezer_do_not_count();
+	current->flags |= PF_FREEZER_SKIP;
 	mutex_lock(&pm_mutex);
 }
 
 static inline void unlock_system_sleep(void)
 {
+	/*
+	 * Don't use freezer_count() because we don't want the call to
+	 * try_to_freeze() here.
+	 *
+	 * Reason:
+	 * Fundamentally, we just don't need it, because freezing condition
+	 * doesn't come into effect until we release the pm_mutex lock,
+	 * since the freezer always works with pm_mutex held.
+	 *
+	 * More importantly, in the case of hibernation,
+	 * unlock_system_sleep() gets called in snapshot_read() and
+	 * snapshot_write() when the freezing condition is still in effect.
+	 * Which means, if we use try_to_freeze() here, it would make them
+	 * enter the refrigerator, thus causing hibernation to lockup.
+	 */
+	current->flags &= ~PF_FREEZER_SKIP;
 	mutex_unlock(&pm_mutex);
-	freezer_count();
 }
 
 #else /* !CONFIG_PM_SLEEP */
-- 
cgit v1.2.3


From 65f2e753f1eb09d3a7e2a0d16408a5433b4097b2 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 20 Jan 2012 17:38:58 +0000
Subject: Revert "ARM: sa11x0: Implement autoloading of codec and codec pdata
 for mcp bus."

This reverts commit 5dd7bf59e0e8563265b3e5b33276099ef628fcc7.

Conflicts:

	scripts/mod/file2alias.c

This change is wrong on many levels.  First and foremost, it causes a
regression.  On boot on Assabet, which this patch gives a codec id of
'ucb1x00', it gives:

	ucb1x00 ID not found: 1005

0x1005 is a valid ID for the UCB1300 device.

Secondly, this patch is way over the top in terms of complexity.  The
only device which has been seen to be connected with this MCP code is
the UCB1x00 (UCB1200, UCB1300 etc) devices, and they all use the same
driver.  Adding a match table, requiring the codec string to match the
hardware ID read out of the ID register, etc is completely over the top
when we can just read the hardware ID register.
---
 arch/arm/mach-sa1100/assabet.c          |  1 -
 arch/arm/mach-sa1100/cerf.c             |  1 -
 arch/arm/mach-sa1100/collie.c           |  8 +-----
 arch/arm/mach-sa1100/include/mach/mcp.h |  2 --
 arch/arm/mach-sa1100/lart.c             |  1 -
 arch/arm/mach-sa1100/shannon.c          |  1 -
 arch/arm/mach-sa1100/simpad.c           |  8 +-----
 drivers/mfd/mcp-core.c                  | 44 ++----------------------------
 drivers/mfd/mcp-sa11x0.c                |  7 ++---
 drivers/mfd/ucb1x00-core.c              | 48 ++++++++-------------------------
 drivers/mfd/ucb1x00-ts.c                |  2 +-
 include/linux/mfd/mcp.h                 |  7 ++---
 include/linux/mfd/ucb1x00.h             |  5 +---
 include/linux/mod_devicetable.h         | 11 --------
 scripts/mod/file2alias.c                | 10 -------
 15 files changed, 21 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index d8aa1c28353b..0c4b76ab4d8e 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -202,7 +202,6 @@ static struct irda_platform_data assabet_irda_data = {
 static struct mcp_plat_data assabet_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init assabet_init(void)
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index fcadc4cafe9a..11bb6d0b9be3 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -124,7 +124,6 @@ static void __init cerf_map_io(void)
 static struct mcp_plat_data cerf_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init cerf_init(void)
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 6b7c74b304cf..b9060e236def 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -27,7 +27,6 @@
 #include <linux/timer.h>
 #include <linux/gpio.h>
 #include <linux/pda_power.h>
-#include <linux/mfd/ucb1x00.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -86,15 +85,10 @@ static struct scoop_pcmcia_config collie_pcmcia_config = {
 	.num_devs	= 1,
 };
 
-static struct ucb1x00_plat_data collie_ucb1x00_data = {
-	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
-};
-
 static struct mcp_plat_data collie_mcp_data = {
 	.mccr0		= MCCR0_ADM | MCCR0_ExtClk,
 	.sclk_rate	= 9216000,
-	.codec		= "ucb1x00",
-	.codec_pdata	= &collie_ucb1x00_data,
+	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
 };
 
 /*
diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h
index 586cec898b35..ed1a331508a7 100644
--- a/arch/arm/mach-sa1100/include/mach/mcp.h
+++ b/arch/arm/mach-sa1100/include/mach/mcp.h
@@ -17,8 +17,6 @@ struct mcp_plat_data {
 	u32 mccr1;
 	unsigned int sclk_rate;
 	int gpio_base;
-	const char *codec;
-	void *codec_pdata;
 };
 
 #endif
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index 48a8f4ef0fcd..af4e2761f3db 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -24,7 +24,6 @@
 static struct mcp_plat_data lart_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init lart_init(void)
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 3807c9135272..318b2b766a0b 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -55,7 +55,6 @@ static struct resource shannon_flash_resource = {
 static struct mcp_plat_data shannon_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init shannon_init(void)
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index d9b765c441f6..e17c04d6e324 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -14,7 +14,6 @@
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
-#include <linux/mfd/ucb1x00.h>
 
 #include <asm/irq.h>
 #include <mach/hardware.h>
@@ -188,15 +187,10 @@ static struct resource simpad_flash_resources [] = {
 	}
 };
 
-static struct ucb1x00_plat_data simpad_ucb1x00_data = {
-	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
-};
-
 static struct mcp_plat_data simpad_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1300",
-	.codec_pdata	= &simpad_ucb1x00_data,
+	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
 };
 
 
diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c
index 63be60bc3455..84815f9ef636 100644
--- a/drivers/mfd/mcp-core.c
+++ b/drivers/mfd/mcp-core.c
@@ -26,35 +26,9 @@
 #define to_mcp(d)		container_of(d, struct mcp, attached_device)
 #define to_mcp_driver(d)	container_of(d, struct mcp_driver, drv)
 
-static const struct mcp_device_id *mcp_match_id(const struct mcp_device_id *id,
-						const char *codec)
-{
-	while (id->name[0]) {
-		if (strcmp(codec, id->name) == 0)
-			return id;
-		id++;
-	}
-	return NULL;
-}
-
-const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp)
-{
-	const struct mcp_driver *driver =
-		to_mcp_driver(mcp->attached_device.driver);
-
-	return mcp_match_id(driver->id_table, mcp->codec);
-}
-EXPORT_SYMBOL(mcp_get_device_id);
-
 static int mcp_bus_match(struct device *dev, struct device_driver *drv)
 {
-	const struct mcp *mcp = to_mcp(dev);
-	const struct mcp_driver *driver = to_mcp_driver(drv);
-
-	if (driver->id_table)
-		return !!mcp_match_id(driver->id_table, mcp->codec);
-
-	return 0;
+	return 1;
 }
 
 static int mcp_bus_probe(struct device *dev)
@@ -100,18 +74,9 @@ static int mcp_bus_resume(struct device *dev)
 	return ret;
 }
 
-static int mcp_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct mcp *mcp = to_mcp(dev);
-
-	add_uevent_var(env, "MODALIAS=%s%s", MCP_MODULE_PREFIX, mcp->codec);
-	return 0;
-}
-
 static struct bus_type mcp_bus_type = {
 	.name		= "mcp",
 	.match		= mcp_bus_match,
-	.uevent		= mcp_bus_uevent,
 	.probe		= mcp_bus_probe,
 	.remove		= mcp_bus_remove,
 	.suspend	= mcp_bus_suspend,
@@ -247,14 +212,9 @@ struct mcp *mcp_host_alloc(struct device *parent, size_t size)
 }
 EXPORT_SYMBOL(mcp_host_alloc);
 
-int mcp_host_register(struct mcp *mcp, void *pdata)
+int mcp_host_register(struct mcp *mcp)
 {
-	if (!mcp->codec)
-		return -EINVAL;
-
-	mcp->attached_device.platform_data = pdata;
 	dev_set_name(&mcp->attached_device, "mcp0");
-	request_module("%s%s", MCP_MODULE_PREFIX, mcp->codec);
 	return device_register(&mcp->attached_device);
 }
 EXPORT_SYMBOL(mcp_host_register);
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index da4e077a1bee..02c53a0766c4 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -146,9 +146,6 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENODEV;
 
-	if (!data->codec)
-		return -ENODEV;
-
 	if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp"))
 		return -EBUSY;
 
@@ -165,7 +162,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->dma_audio_wr	= DMA_Ser4MCP0Wr;
 	mcp->dma_telco_rd	= DMA_Ser4MCP1Rd;
 	mcp->dma_telco_wr	= DMA_Ser4MCP1Wr;
-	mcp->codec		= data->codec;
+	mcp->gpio_base		= data->gpio_base;
 
 	platform_set_drvdata(pdev, mcp);
 
@@ -198,7 +195,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->rw_timeout = (64 * 3 * 1000000 + mcp->sclk_rate - 1) /
 			  mcp->sclk_rate;
 
-	ret = mcp_host_register(mcp, data->codec_pdata);
+	ret = mcp_host_register(mcp);
 	if (ret == 0)
 		goto out;
 
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index 91c4f25e0e55..b281217334eb 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -36,15 +36,6 @@ static DEFINE_MUTEX(ucb1x00_mutex);
 static LIST_HEAD(ucb1x00_drivers);
 static LIST_HEAD(ucb1x00_devices);
 
-static struct mcp_device_id ucb1x00_id[] = {
-	{ "ucb1x00", 0 },  /* auto-detection */
-	{ "ucb1200", UCB_ID_1200 },
-	{ "ucb1300", UCB_ID_1300 },
-	{ "tc35143", UCB_ID_TC35143 },
-	{ }
-};
-MODULE_DEVICE_TABLE(mcp, ucb1x00_id);
-
 /**
  *	ucb1x00_io_set_dir - set IO direction
  *	@ucb: UCB1x00 structure describing chip
@@ -536,33 +527,17 @@ static struct class ucb1x00_class = {
 
 static int ucb1x00_probe(struct mcp *mcp)
 {
-	const struct mcp_device_id *mid;
 	struct ucb1x00 *ucb;
 	struct ucb1x00_driver *drv;
-	struct ucb1x00_plat_data *pdata;
 	unsigned int id;
 	int ret = -ENODEV;
 	int temp;
 
 	mcp_enable(mcp);
 	id = mcp_reg_read(mcp, UCB_ID);
-	mid = mcp_get_device_id(mcp);
 
-	if (mid && mid->driver_data) {
-		if (id != mid->driver_data) {
-			printk(KERN_WARNING "%s wrong ID %04x found: %04x\n",
-				mid->name, (unsigned int) mid->driver_data, id);
-			goto err_disable;
-		}
-	} else {
-		mid = &ucb1x00_id[1];
-		while (mid->driver_data) {
-			if (id == mid->driver_data)
-				break;
-			mid++;
-		}
-		printk(KERN_WARNING "%s ID not found: %04x\n",
-			ucb1x00_id[0].name, id);
+	if (id != UCB_ID_1200 && id != UCB_ID_1300 && id != UCB_ID_TC35143) {
+		printk(KERN_WARNING "UCB1x00 ID not found: %04x\n", id);
 		goto err_disable;
 	}
 
@@ -571,28 +546,28 @@ static int ucb1x00_probe(struct mcp *mcp)
 	if (!ucb)
 		goto err_disable;
 
-	pdata = mcp->attached_device.platform_data;
+
 	ucb->dev.class = &ucb1x00_class;
 	ucb->dev.parent = &mcp->attached_device;
-	dev_set_name(&ucb->dev, mid->name);
+	dev_set_name(&ucb->dev, "ucb1x00");
 
 	spin_lock_init(&ucb->lock);
 	spin_lock_init(&ucb->io_lock);
 	sema_init(&ucb->adc_sem, 1);
 
-	ucb->id  = mid;
+	ucb->id  = id;
 	ucb->mcp = mcp;
 	ucb->irq = ucb1x00_detect_irq(ucb);
 	if (ucb->irq == NO_IRQ) {
-		printk(KERN_ERR "%s: IRQ probe failed\n", mid->name);
+		printk(KERN_ERR "UCB1x00: IRQ probe failed\n");
 		ret = -ENODEV;
 		goto err_free;
 	}
 
 	ucb->gpio.base = -1;
-	if (pdata && (pdata->gpio_base >= 0)) {
+	if (mcp->gpio_base != 0) {
 		ucb->gpio.label = dev_name(&ucb->dev);
-		ucb->gpio.base = pdata->gpio_base;
+		ucb->gpio.base = mcp->gpio_base;
 		ucb->gpio.ngpio = 10;
 		ucb->gpio.set = ucb1x00_gpio_set;
 		ucb->gpio.get = ucb1x00_gpio_get;
@@ -605,10 +580,10 @@ static int ucb1x00_probe(struct mcp *mcp)
 		dev_info(&ucb->dev, "gpio_base not set so no gpiolib support");
 
 	ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING,
-			  mid->name, ucb);
+			  "UCB1x00", ucb);
 	if (ret) {
-		printk(KERN_ERR "%s: unable to grab irq%d: %d\n",
-			mid->name, ucb->irq, ret);
+		printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
+			ucb->irq, ret);
 		goto err_gpio;
 	}
 
@@ -730,7 +705,6 @@ static struct mcp_driver ucb1x00_driver = {
 	.remove		= ucb1x00_remove,
 	.suspend	= ucb1x00_suspend,
 	.resume		= ucb1x00_resume,
-	.id_table	= ucb1x00_id,
 };
 
 static int __init ucb1x00_init(void)
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 40ec3c118868..38ffbd50a0d2 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -382,7 +382,7 @@ static int ucb1x00_ts_add(struct ucb1x00_dev *dev)
 	ts->adcsync = adcsync ? UCB_SYNC : UCB_NOSYNC;
 
 	idev->name       = "Touchscreen panel";
-	idev->id.product = ts->ucb->id->driver_data;
+	idev->id.product = ts->ucb->id;
 	idev->open       = ucb1x00_ts_open;
 	idev->close      = ucb1x00_ts_close;
 
diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index 1515e64e3663..ee496708e38b 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -10,7 +10,6 @@
 #ifndef MCP_H
 #define MCP_H
 
-#include <linux/mod_devicetable.h>
 #include <mach/dma.h>
 
 struct mcp_ops;
@@ -27,7 +26,7 @@ struct mcp {
 	dma_device_t	dma_telco_rd;
 	dma_device_t	dma_telco_wr;
 	struct device	attached_device;
-	const char	*codec;
+	int		gpio_base;
 };
 
 struct mcp_ops {
@@ -45,11 +44,10 @@ void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
 unsigned int mcp_reg_read(struct mcp *, unsigned int);
 void mcp_enable(struct mcp *);
 void mcp_disable(struct mcp *);
-const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp);
 #define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
 
 struct mcp *mcp_host_alloc(struct device *, size_t);
-int mcp_host_register(struct mcp *, void *);
+int mcp_host_register(struct mcp *);
 void mcp_host_unregister(struct mcp *);
 
 struct mcp_driver {
@@ -58,7 +56,6 @@ struct mcp_driver {
 	void (*remove)(struct mcp *);
 	int (*suspend)(struct mcp *, pm_message_t);
 	int (*resume)(struct mcp *);
-	const struct mcp_device_id *id_table;
 };
 
 int mcp_driver_register(struct mcp_driver *);
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index bc19e5fb7ea8..4321f044d1e4 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h
@@ -104,9 +104,6 @@
 #define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
 #define UCB_MODE_AUD_OFF_CAN	(1 << 13)
 
-struct ucb1x00_plat_data {
-	int		gpio_base;
-};
 
 struct ucb1x00_irq {
 	void *devid;
@@ -119,7 +116,7 @@ struct ucb1x00 {
 	unsigned int		irq;
 	struct semaphore	adc_sem;
 	spinlock_t		io_lock;
-	const struct mcp_device_id *id;
+	u16			id;
 	u16			io_dir;
 	u16			io_out;
 	u16			adc_cr;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b29e7f6f8fa5..83ac0713ed0a 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -436,17 +436,6 @@ struct spi_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
-/* mcp */
-
-#define MCP_NAME_SIZE	20
-#define MCP_MODULE_PREFIX "mcp:"
-
-struct mcp_device_id {
-	char name[MCP_NAME_SIZE];
-	kernel_ulong_t driver_data	/* Data private to the driver */
-			__attribute__((aligned(sizeof(kernel_ulong_t))));
-};
-
 /* dmi */
 enum dmi_field {
 	DMI_NONE,
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index c0e14b3f2306..e8c969577768 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -823,16 +823,6 @@ static int do_spi_entry(const char *filename, struct spi_device_id *id,
 }
 ADD_TO_DEVTABLE("spi", struct spi_device_id, do_spi_entry);
 
-/* Looks like: mcp:S */
-static int do_mcp_entry(const char *filename, struct mcp_device_id *id,
-			char *alias)
-{
-	sprintf(alias, MCP_MODULE_PREFIX "%s", id->name);
-
-	return 1;
-}
-ADD_TO_DEVTABLE("mcp", struct mcp_device_id, do_mcp_entry); 
-
 static const struct dmifield {
 	const char *prefix;
 	int field;
-- 
cgit v1.2.3


From 2a7f51a3e08cdaeea78d9e101a0079422a55bbc3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 21 Jan 2012 09:28:53 +0000
Subject: MFD: mcp-core: fix mcp_priv() to be more type safe

mcp_priv() does unexpected things when passed a void pointer.  Make it
a typed inline function, which ensures that it works correctly in
these cases.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/mfd/mcp.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index ee496708e38b..f88c1cc0cb0f 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -64,6 +64,9 @@ void mcp_driver_unregister(struct mcp_driver *);
 #define mcp_get_drvdata(mcp)	dev_get_drvdata(&(mcp)->attached_device)
 #define mcp_set_drvdata(mcp,d)	dev_set_drvdata(&(mcp)->attached_device, d)
 
-#define mcp_priv(mcp)		((void *)((mcp)+1))
+static inline void *mcp_priv(struct mcp *mcp)
+{
+	return mcp + 1;
+}
 
 #endif
-- 
cgit v1.2.3


From 93ece0c1a7ace88f10411dbb5643d2aa2fe00ebf Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.co.il>
Date: Thu, 19 Jan 2012 09:45:05 +0000
Subject: mlx4_en: eth statistics modification

In native mode display all available staticstics.
In SRIOV mode on VF display only SW counters statistics,
in SRIOV mode on hypervisor display SW counters and errors (got from FW)
statistics.

Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Reviewed-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 66 +++++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c  |  2 +
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    |  1 +
 drivers/net/ethernet/mellanox/mlx4/port.c       | 21 ++++++++
 include/linux/mlx4/device.h                     |  1 +
 5 files changed, 75 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 53c66869aecd..70346fd7f9c4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -183,10 +183,11 @@ static int mlx4_en_set_wol(struct net_device *netdev,
 static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int bit_count = hweight64(priv->stats_bitmap);
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		return NUM_ALL_STATS +
+		return (priv->stats_bitmap ? bit_count : NUM_ALL_STATS) +
 			(priv->tx_ring_num + priv->rx_ring_num) * 2;
 	case ETH_SS_TEST:
 		return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
@@ -201,14 +202,34 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	int index = 0;
-	int i;
+	int i, j = 0;
 
 	spin_lock_bh(&priv->stats_lock);
 
-	for (i = 0; i < NUM_MAIN_STATS; i++)
-		data[index++] = ((unsigned long *) &priv->stats)[i];
-	for (i = 0; i < NUM_PORT_STATS; i++)
-		data[index++] = ((unsigned long *) &priv->port_stats)[i];
+	if (!(priv->stats_bitmap)) {
+		for (i = 0; i < NUM_MAIN_STATS; i++)
+			data[index++] =
+				((unsigned long *) &priv->stats)[i];
+		for (i = 0; i < NUM_PORT_STATS; i++)
+			data[index++] =
+				((unsigned long *) &priv->port_stats)[i];
+		for (i = 0; i < NUM_PKT_STATS; i++)
+			data[index++] =
+				((unsigned long *) &priv->pkstats)[i];
+	} else {
+		for (i = 0; i < NUM_MAIN_STATS; i++) {
+			if ((priv->stats_bitmap >> j) & 1)
+				data[index++] =
+				((unsigned long *) &priv->stats)[i];
+			j++;
+		}
+		for (i = 0; i < NUM_PORT_STATS; i++) {
+			if ((priv->stats_bitmap >> j) & 1)
+				data[index++] =
+				((unsigned long *) &priv->port_stats)[i];
+			j++;
+		}
+	}
 	for (i = 0; i < priv->tx_ring_num; i++) {
 		data[index++] = priv->tx_ring[i].packets;
 		data[index++] = priv->tx_ring[i].bytes;
@@ -217,8 +238,6 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 		data[index++] = priv->rx_ring[i].packets;
 		data[index++] = priv->rx_ring[i].bytes;
 	}
-	for (i = 0; i < NUM_PKT_STATS; i++)
-		data[index++] = ((unsigned long *) &priv->pkstats)[i];
 	spin_unlock_bh(&priv->stats_lock);
 
 }
@@ -247,11 +266,29 @@ static void mlx4_en_get_strings(struct net_device *dev,
 
 	case ETH_SS_STATS:
 		/* Add main counters */
-		for (i = 0; i < NUM_MAIN_STATS; i++)
-			strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]);
-		for (i = 0; i< NUM_PORT_STATS; i++)
-			strcpy(data + (index++) * ETH_GSTRING_LEN,
-			main_strings[i + NUM_MAIN_STATS]);
+		if (!priv->stats_bitmap) {
+			for (i = 0; i < NUM_MAIN_STATS; i++)
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+					main_strings[i]);
+			for (i = 0; i < NUM_PORT_STATS; i++)
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+					main_strings[i +
+					NUM_MAIN_STATS]);
+			for (i = 0; i < NUM_PKT_STATS; i++)
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+					main_strings[i +
+					NUM_MAIN_STATS +
+					NUM_PORT_STATS]);
+		} else
+			for (i = 0; i < NUM_MAIN_STATS + NUM_PORT_STATS; i++) {
+				if ((priv->stats_bitmap >> i) & 1) {
+					strcpy(data +
+					       (index++) * ETH_GSTRING_LEN,
+					       main_strings[i]);
+				}
+				if (!(priv->stats_bitmap >> i))
+					break;
+			}
 		for (i = 0; i < priv->tx_ring_num; i++) {
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"tx%d_packets", i);
@@ -264,9 +301,6 @@ static void mlx4_en_get_strings(struct net_device *dev,
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"rx%d_bytes", i);
 		}
-		for (i = 0; i< NUM_PKT_STATS; i++)
-			strcpy(data + (index++) * ETH_GSTRING_LEN,
-			main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]);
 		break;
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index be3f4156aaab..467ae5824875 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -702,6 +702,8 @@ int mlx4_en_start_port(struct net_device *dev)
 	/* Schedule multicast task to populate multicast list */
 	queue_work(mdev->workqueue, &priv->mcast_task);
 
+	mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap);
+
 	priv->port_up = true;
 	netif_tx_start_all_queues(dev);
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f4d189a1290e..35f08840813c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -476,6 +476,7 @@ struct mlx4_en_priv {
 	struct mlx4_en_perf_stats pstats;
 	struct mlx4_en_pkt_stats pkstats;
 	struct mlx4_en_port_stats port_stats;
+	u64 stats_bitmap;
 	char *mc_addrs;
 	int mc_addrs_cnt;
 	struct mlx4_en_stat_out_mbox hw_stats;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 1a551d69ddcb..f44ae555bf43 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -44,6 +44,11 @@
 #define MLX4_VLAN_VALID		(1u << 31)
 #define MLX4_VLAN_MASK		0xfff
 
+#define MLX4_STATS_TRAFFIC_COUNTERS_MASK	0xfULL
+#define MLX4_STATS_TRAFFIC_DROPS_MASK		0xc0ULL
+#define MLX4_STATS_ERROR_COUNTERS_MASK		0x1ffc30ULL
+#define MLX4_STATS_PORT_COUNTERS_MASK		0x1fe00000ULL
+
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
 {
 	int i;
@@ -903,3 +908,19 @@ int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
 	return mlx4_common_dump_eth_stats(dev, slave,
 					  vhcr->in_modifier, outbox);
 }
+
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
+{
+	if (!mlx4_is_mfunc(dev)) {
+		*stats_bitmap = 0;
+		return;
+	}
+
+	*stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK |
+			 MLX4_STATS_TRAFFIC_DROPS_MASK |
+			 MLX4_STATS_PORT_COUNTERS_MASK);
+
+	if (mlx4_is_master(dev))
+		*stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
+}
+EXPORT_SYMBOL(mlx4_set_stats_bitmap);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5c4fe8e5bfe5..aea61905499b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -621,6 +621,7 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
 int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn);
 void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn);
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap);
 
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-- 
cgit v1.2.3


From 974c12360dfe6ab01201fe9e708e7755c413f8b6 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 19 Jan 2012 14:42:21 +0000
Subject: tcp: detect loss above high_seq in recovery

Correctly implement a loss detection heuristic: New sequences (above
high_seq) sent during the fast recovery are deemed lost when higher
sequences are SACKed.

Current code does not catch these losses, because tcp_mark_head_lost()
does not check packets beyond high_seq. The fix is straight-forward by
checking packets until the highest sacked packet. In addition, all the
FLAG_DATA_LOST logic are in-effective and redundant and can be removed.

Update the loss heuristic comments. The algorithm above is documented
as heuristic B, but it is redundant too because heuristic A already
covers B.

Note that this change only marks some forward-retransmitted packets LOST.
It does NOT forbid TCP performing further CWR on new losses. A potential
follow-up patch under preparation is to perform another CWR on "new"
losses such as
1) sequence above high_seq is lost (by resetting high_seq to snd_nxt)
2) retransmission is lost.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h |  1 -
 net/ipv4/proc.c      |  1 -
 net/ipv4/tcp_input.c | 41 +++++++++++++++--------------------------
 3 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index e16557a357e5..c1241c428179 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -192,7 +192,6 @@ enum
 	LINUX_MIB_TCPPARTIALUNDO,		/* TCPPartialUndo */
 	LINUX_MIB_TCPDSACKUNDO,			/* TCPDSACKUndo */
 	LINUX_MIB_TCPLOSSUNDO,			/* TCPLossUndo */
-	LINUX_MIB_TCPLOSS,			/* TCPLoss */
 	LINUX_MIB_TCPLOSTRETRANSMIT,		/* TCPLostRetransmit */
 	LINUX_MIB_TCPRENOFAILURES,		/* TCPRenoFailures */
 	LINUX_MIB_TCPSACKFAILURES,		/* TCPSackFailures */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3569d8ecaeac..6afc807ee2ad 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO),
 	SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO),
 	SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO),
-	SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS),
 	SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT),
 	SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES),
 	SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2877c3e09587..976034f82320 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/
 #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
-#define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/
 #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
 #define FLAG_ONLY_ORIG_SACKED	0x200 /* SACKs only non-rexmit sent before RTO */
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
  * These 6 states form finite state machine, controlled by the following events:
  * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
  * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
- * 3. Loss detection event of one of three flavors:
+ * 3. Loss detection event of two flavors:
  *	A. Scoreboard estimator decided the packet is lost.
  *	   A'. Reno "three dupacks" marks head of queue lost.
- *	   A''. Its FACK modfication, head until snd.fack is lost.
- *	B. SACK arrives sacking data transmitted after never retransmitted
- *	   hole was sent out.
- *	C. SACK arrives sacking SND.NXT at the moment, when the
+ *	   A''. Its FACK modification, head until snd.fack is lost.
+ *	B. SACK arrives sacking SND.NXT at the moment, when the
  *	   segment was retransmitted.
  * 4. D-SACK added new rule: D-SACK changes any tag to S.
  *
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
 }
 
 /* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
- * Event "C". Later note: FACK people cheated me again 8), we have to account
+ * Event "B". Later note: FACK people cheated me again 8), we have to account
  * for reordering! Ugly, but should help.
  *
  * Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 		if (found_dup_sack && ((i + 1) == first_sack_index))
 			next_dup = &sp[i + 1];
 
-		/* Event "B" in the comment above. */
-		if (after(end_seq, tp->high_seq))
-			state.flag |= FLAG_DATA_LOST;
-
 		/* Skip too early cached blocks */
 		while (tcp_sack_cache_ok(tp, cache) &&
 		       !before(start_seq, cache->end_seq))
@@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk)
 	tcp_verify_left_out(tp);
 }
 
-/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
- * is against sacked "cnt", otherwise it's against facked "cnt"
+/* Detect loss in event "A" above by marking head of queue up as lost.
+ * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * are considered lost. For RFC3517 SACK, a segment is considered lost if it
+ * has at least tp->reordering SACKed seqments above it; "packets" refers to
+ * the maximum SACKed segments to pass before reaching this limit.
  */
 static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 {
@@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 	int cnt, oldcnt;
 	int err;
 	unsigned int mss;
+	/* Use SACK to deduce losses of new sequences sent during recovery */
+	const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
 
 	WARN_ON(packets > tp->packets_out);
 	if (tp->lost_skb_hint) {
@@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 		tp->lost_skb_hint = skb;
 		tp->lost_cnt_hint = cnt;
 
-		if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+		if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
 			break;
 
 		oldcnt = cnt;
@@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 	if (tcp_check_sack_reneging(sk, flag))
 		return;
 
-	/* C. Process data loss notification, provided it is valid. */
-	if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
-	    before(tp->snd_una, tp->high_seq) &&
-	    icsk->icsk_ca_state != TCP_CA_Open &&
-	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
-	}
-
-	/* D. Check consistency of the current state. */
+	/* C. Check consistency of the current state. */
 	tcp_verify_left_out(tp);
 
-	/* E. Check state exit conditions. State can be terminated
+	/* D. Check state exit conditions. State can be terminated
 	 *    when high_seq is ACKed. */
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
 		WARN_ON(tp->retrans_out != 0);
@@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 		}
 	}
 
-	/* F. Process state. */
+	/* E. Process state. */
 	switch (icsk->icsk_ca_state) {
 	case TCP_CA_Recovery:
 		if (!(flag & FLAG_SND_UNA_ADVANCED)) {
-- 
cgit v1.2.3


From 8cfd14ad1eb52e44cb1fe7b47a68126e45e04026 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Fri, 20 Jan 2012 04:57:15 +0000
Subject: cgroup: make sure memcg margin is 0 when over limit

For the memcg sock code, we'll need to register allocations
that are temporarily over limit. Let's make sure that margin
is 0 in this case.

I am keeping this as a separate patch, so that if any weirdness
interaction appears in the future, we can now exactly what caused
it.

Suggested by Johannes Weiner

Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: Johannes Weiner <hannes@cmpxchg.org>
CC: Michal Hocko <mhocko@suse.cz>
CC: Tejun Heo <tj@kernel.org>
CC: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/res_counter.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index c9d625ca659e..d06d014afda6 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -142,7 +142,10 @@ static inline unsigned long long res_counter_margin(struct res_counter *cnt)
 	unsigned long flags;
 
 	spin_lock_irqsave(&cnt->lock, flags);
-	margin = cnt->limit - cnt->usage;
+	if (cnt->limit > cnt->usage)
+		margin = cnt->limit - cnt->usage;
+	else
+		margin = 0;
 	spin_unlock_irqrestore(&cnt->lock, flags);
 	return margin;
 }
-- 
cgit v1.2.3


From 0e90b31f4ba77027a7c21cbfc66404df0851ca21 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Fri, 20 Jan 2012 04:57:16 +0000
Subject: net: introduce res_counter_charge_nofail() for socket allocations

There is a case in __sk_mem_schedule(), where an allocation
is beyond the maximum, but yet we are allowed to proceed.
It happens under the following condition:

	sk->sk_wmem_queued + size >= sk->sk_sndbuf

The network code won't revert the allocation in this case,
meaning that at some point later it'll try to do it. Since
this is never communicated to the underlying res_counter
code, there is an inbalance in res_counter uncharge operation.

I see two ways of fixing this:

1) storing the information about those allocations somewhere
   in memcg, and then deducting from that first, before
   we start draining the res_counter,
2) providing a slightly different allocation function for
   the res_counter, that matches the original behavior of
   the network code more closely.

I decided to go for #2 here, believing it to be more elegant,
since #1 would require us to do basically that, but in a more
obscure way.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
CC: Tejun Heo <tj@kernel.org>
CC: Li Zefan <lizf@cn.fujitsu.com>
CC: Laurent Chavey <chavey@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/res_counter.h |  6 ++++++
 include/net/sock.h          | 10 ++++------
 kernel/res_counter.c        | 25 +++++++++++++++++++++++++
 net/core/sock.c             |  4 ++--
 4 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index d06d014afda6..da81af086eaf 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -109,12 +109,18 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
  *
  * returns 0 on success and <0 if the counter->usage will exceed the
  * counter->limit _locked call expects the counter->lock to be taken
+ *
+ * charge_nofail works the same, except that it charges the resource
+ * counter unconditionally, and returns < 0 if the after the current
+ * charge we are over limit.
  */
 
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
 		unsigned long val, struct res_counter **limit_fail_at);
+int __must_check res_counter_charge_nofail(struct res_counter *counter,
+		unsigned long val, struct res_counter **limit_fail_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
diff --git a/include/net/sock.h b/include/net/sock.h
index 0e7a9b05f92b..4c69ac165e6b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1008,9 +1008,8 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
 	struct res_counter *fail;
 	int ret;
 
-	ret = res_counter_charge(prot->memory_allocated,
-				 amt << PAGE_SHIFT, &fail);
-
+	ret = res_counter_charge_nofail(prot->memory_allocated,
+					amt << PAGE_SHIFT, &fail);
 	if (ret < 0)
 		*parent_status = OVER_LIMIT;
 }
@@ -1054,12 +1053,11 @@ sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
 }
 
 static inline void
-sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status)
+sk_memory_allocated_sub(struct sock *sk, int amt)
 {
 	struct proto *prot = sk->sk_prot;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-	    parent_status != OVER_LIMIT) /* Otherwise was uncharged already */
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
 		memcg_memory_allocated_sub(sk->sk_cgrp, amt);
 
 	atomic_long_sub(amt, prot->memory_allocated);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 6d269cce7aa1..d508363858b3 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -66,6 +66,31 @@ done:
 	return ret;
 }
 
+int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
+			      struct res_counter **limit_fail_at)
+{
+	int ret, r;
+	unsigned long flags;
+	struct res_counter *c;
+
+	r = ret = 0;
+	*limit_fail_at = NULL;
+	local_irq_save(flags);
+	for (c = counter; c != NULL; c = c->parent) {
+		spin_lock(&c->lock);
+		r = res_counter_charge_locked(c, val);
+		if (r)
+			c->usage += val;
+		spin_unlock(&c->lock);
+		if (r < 0 && ret == 0) {
+			*limit_fail_at = c;
+			ret = r;
+		}
+	}
+	local_irq_restore(flags);
+
+	return ret;
+}
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 {
 	if (WARN_ON(counter->usage < val))
diff --git a/net/core/sock.c b/net/core/sock.c
index 5c5af9988f94..3e81fd2e3c75 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1827,7 +1827,7 @@ suppress_allocation:
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
 
-	sk_memory_allocated_sub(sk, amt, parent_status);
+	sk_memory_allocated_sub(sk, amt);
 
 	return 0;
 }
@@ -1840,7 +1840,7 @@ EXPORT_SYMBOL(__sk_mem_schedule);
 void __sk_mem_reclaim(struct sock *sk)
 {
 	sk_memory_allocated_sub(sk,
-				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0);
+				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
 	if (sk_under_memory_pressure(sk) &&
-- 
cgit v1.2.3


From e9c688a3272fd4b659228f3880de8109a94540e2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Sun, 22 Jan 2012 14:31:15 -0700
Subject: driver core: remove drivers/base/sys.c and include/linux/sysdev.h

Now that all users of 'struct sysdev' are removed from the kernel, we
can safely remove the .h and .c files for this code, to ensure that no
one accidentally starts to use it again.

Many thanks for Kay who did all the hard work here on making this
happen.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/Makefile  |   2 +-
 drivers/base/sys.c     | 383 -------------------------------------------------
 include/linux/sysdev.h | 164 ---------------------
 3 files changed, 1 insertion(+), 548 deletions(-)
 delete mode 100644 drivers/base/sys.c
 delete mode 100644 include/linux/sysdev.h

(limited to 'include/linux')

diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 2c8272dd93c4..610f9997a403 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -1,6 +1,6 @@
 # Makefile for the Linux device tree
 
-obj-y			:= core.o sys.o bus.o dd.o syscore.o \
+obj-y			:= core.o bus.o dd.o syscore.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o devres.o \
 			   attribute_container.o transport_class.o \
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
deleted file mode 100644
index 409f5ce78829..000000000000
--- a/drivers/base/sys.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * sys.c - pseudo-bus for system 'devices' (cpus, PICs, timers, etc)
- *
- * Copyright (c) 2002-3 Patrick Mochel
- *               2002-3 Open Source Development Lab
- *
- * This file is released under the GPLv2
- *
- * This exports a 'system' bus type.
- * By default, a 'sys' bus gets added to the root of the system. There will
- * always be core system devices. Devices can use sysdev_register() to
- * add themselves as children of the system bus.
- */
-
-#include <linux/sysdev.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/pm.h>
-#include <linux/device.h>
-#include <linux/mutex.h>
-#include <linux/interrupt.h>
-
-#include "base.h"
-
-#define to_sysdev(k) container_of(k, struct sys_device, kobj)
-#define to_sysdev_attr(a) container_of(a, struct sysdev_attribute, attr)
-
-
-static ssize_t
-sysdev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
-{
-	struct sys_device *sysdev = to_sysdev(kobj);
-	struct sysdev_attribute *sysdev_attr = to_sysdev_attr(attr);
-
-	if (sysdev_attr->show)
-		return sysdev_attr->show(sysdev, sysdev_attr, buffer);
-	return -EIO;
-}
-
-
-static ssize_t
-sysdev_store(struct kobject *kobj, struct attribute *attr,
-	     const char *buffer, size_t count)
-{
-	struct sys_device *sysdev = to_sysdev(kobj);
-	struct sysdev_attribute *sysdev_attr = to_sysdev_attr(attr);
-
-	if (sysdev_attr->store)
-		return sysdev_attr->store(sysdev, sysdev_attr, buffer, count);
-	return -EIO;
-}
-
-static const struct sysfs_ops sysfs_ops = {
-	.show	= sysdev_show,
-	.store	= sysdev_store,
-};
-
-static struct kobj_type ktype_sysdev = {
-	.sysfs_ops	= &sysfs_ops,
-};
-
-
-int sysdev_create_file(struct sys_device *s, struct sysdev_attribute *a)
-{
-	return sysfs_create_file(&s->kobj, &a->attr);
-}
-
-
-void sysdev_remove_file(struct sys_device *s, struct sysdev_attribute *a)
-{
-	sysfs_remove_file(&s->kobj, &a->attr);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_create_file);
-EXPORT_SYMBOL_GPL(sysdev_remove_file);
-
-#define to_sysdev_class(k) container_of(k, struct sysdev_class, kset.kobj)
-#define to_sysdev_class_attr(a) container_of(a, \
-	struct sysdev_class_attribute, attr)
-
-static ssize_t sysdev_class_show(struct kobject *kobj, struct attribute *attr,
-				 char *buffer)
-{
-	struct sysdev_class *class = to_sysdev_class(kobj);
-	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
-
-	if (class_attr->show)
-		return class_attr->show(class, class_attr, buffer);
-	return -EIO;
-}
-
-static ssize_t sysdev_class_store(struct kobject *kobj, struct attribute *attr,
-				  const char *buffer, size_t count)
-{
-	struct sysdev_class *class = to_sysdev_class(kobj);
-	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
-
-	if (class_attr->store)
-		return class_attr->store(class, class_attr, buffer, count);
-	return -EIO;
-}
-
-static const struct sysfs_ops sysfs_class_ops = {
-	.show	= sysdev_class_show,
-	.store	= sysdev_class_store,
-};
-
-static struct kobj_type ktype_sysdev_class = {
-	.sysfs_ops	= &sysfs_class_ops,
-};
-
-int sysdev_class_create_file(struct sysdev_class *c,
-			     struct sysdev_class_attribute *a)
-{
-	return sysfs_create_file(&c->kset.kobj, &a->attr);
-}
-EXPORT_SYMBOL_GPL(sysdev_class_create_file);
-
-void sysdev_class_remove_file(struct sysdev_class *c,
-			      struct sysdev_class_attribute *a)
-{
-	sysfs_remove_file(&c->kset.kobj, &a->attr);
-}
-EXPORT_SYMBOL_GPL(sysdev_class_remove_file);
-
-extern struct kset *system_kset;
-
-int sysdev_class_register(struct sysdev_class *cls)
-{
-	int retval;
-
-	pr_debug("Registering sysdev class '%s'\n", cls->name);
-
-	INIT_LIST_HEAD(&cls->drivers);
-	memset(&cls->kset.kobj, 0x00, sizeof(struct kobject));
-	cls->kset.kobj.parent = &system_kset->kobj;
-	cls->kset.kobj.ktype = &ktype_sysdev_class;
-	cls->kset.kobj.kset = system_kset;
-
-	retval = kobject_set_name(&cls->kset.kobj, "%s", cls->name);
-	if (retval)
-		return retval;
-
-	retval = kset_register(&cls->kset);
-	if (!retval && cls->attrs)
-		retval = sysfs_create_files(&cls->kset.kobj,
-					    (const struct attribute **)cls->attrs);
-	return retval;
-}
-
-void sysdev_class_unregister(struct sysdev_class *cls)
-{
-	pr_debug("Unregistering sysdev class '%s'\n",
-		 kobject_name(&cls->kset.kobj));
-	if (cls->attrs)
-		sysfs_remove_files(&cls->kset.kobj,
-				   (const struct attribute **)cls->attrs);
-	kset_unregister(&cls->kset);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_class_register);
-EXPORT_SYMBOL_GPL(sysdev_class_unregister);
-
-static DEFINE_MUTEX(sysdev_drivers_lock);
-
-/*
- * @dev != NULL means that we're unwinding because some drv->add()
- * failed for some reason. You need to grab sysdev_drivers_lock before
- * calling this.
- */
-static void __sysdev_driver_remove(struct sysdev_class *cls,
-				   struct sysdev_driver *drv,
-				   struct sys_device *from_dev)
-{
-	struct sys_device *dev = from_dev;
-
-	list_del_init(&drv->entry);
-	if (!cls)
-		return;
-
-	if (!drv->remove)
-		goto kset_put;
-
-	if (dev)
-		list_for_each_entry_continue_reverse(dev, &cls->kset.list,
-						     kobj.entry)
-			drv->remove(dev);
-	else
-		list_for_each_entry(dev, &cls->kset.list, kobj.entry)
-			drv->remove(dev);
-
-kset_put:
-	kset_put(&cls->kset);
-}
-
-/**
- *	sysdev_driver_register - Register auxiliary driver
- *	@cls:	Device class driver belongs to.
- *	@drv:	Driver.
- *
- *	@drv is inserted into @cls->drivers to be
- *	called on each operation on devices of that class. The refcount
- *	of @cls is incremented.
- */
-int sysdev_driver_register(struct sysdev_class *cls, struct sysdev_driver *drv)
-{
-	struct sys_device *dev = NULL;
-	int err = 0;
-
-	if (!cls) {
-		WARN(1, KERN_WARNING "sysdev: invalid class passed to %s!\n",
-			__func__);
-		return -EINVAL;
-	}
-
-	/* Check whether this driver has already been added to a class. */
-	if (drv->entry.next && !list_empty(&drv->entry))
-		WARN(1, KERN_WARNING "sysdev: class %s: driver (%p) has already"
-			" been registered to a class, something is wrong, but "
-			"will forge on!\n", cls->name, drv);
-
-	mutex_lock(&sysdev_drivers_lock);
-	if (cls && kset_get(&cls->kset)) {
-		list_add_tail(&drv->entry, &cls->drivers);
-
-		/* If devices of this class already exist, tell the driver */
-		if (drv->add) {
-			list_for_each_entry(dev, &cls->kset.list, kobj.entry) {
-				err = drv->add(dev);
-				if (err)
-					goto unwind;
-			}
-		}
-	} else {
-		err = -EINVAL;
-		WARN(1, KERN_ERR "%s: invalid device class\n", __func__);
-	}
-
-	goto unlock;
-
-unwind:
-	__sysdev_driver_remove(cls, drv, dev);
-
-unlock:
-	mutex_unlock(&sysdev_drivers_lock);
-	return err;
-}
-
-/**
- *	sysdev_driver_unregister - Remove an auxiliary driver.
- *	@cls:	Class driver belongs to.
- *	@drv:	Driver.
- */
-void sysdev_driver_unregister(struct sysdev_class *cls,
-			      struct sysdev_driver *drv)
-{
-	mutex_lock(&sysdev_drivers_lock);
-	__sysdev_driver_remove(cls, drv, NULL);
-	mutex_unlock(&sysdev_drivers_lock);
-}
-EXPORT_SYMBOL_GPL(sysdev_driver_register);
-EXPORT_SYMBOL_GPL(sysdev_driver_unregister);
-
-/**
- *	sysdev_register - add a system device to the tree
- *	@sysdev:	device in question
- *
- */
-int sysdev_register(struct sys_device *sysdev)
-{
-	int error;
-	struct sysdev_class *cls = sysdev->cls;
-
-	if (!cls)
-		return -EINVAL;
-
-	pr_debug("Registering sys device of class '%s'\n",
-		 kobject_name(&cls->kset.kobj));
-
-	/* initialize the kobject to 0, in case it had previously been used */
-	memset(&sysdev->kobj, 0x00, sizeof(struct kobject));
-
-	/* Make sure the kset is set */
-	sysdev->kobj.kset = &cls->kset;
-
-	/* Register the object */
-	error = kobject_init_and_add(&sysdev->kobj, &ktype_sysdev, NULL,
-				     "%s%d", kobject_name(&cls->kset.kobj),
-				     sysdev->id);
-
-	if (!error) {
-		struct sysdev_driver *drv;
-
-		pr_debug("Registering sys device '%s'\n",
-			 kobject_name(&sysdev->kobj));
-
-		mutex_lock(&sysdev_drivers_lock);
-		/* Generic notification is implicit, because it's that
-		 * code that should have called us.
-		 */
-
-		/* Notify class auxiliary drivers */
-		list_for_each_entry(drv, &cls->drivers, entry) {
-			if (drv->add)
-				drv->add(sysdev);
-		}
-		mutex_unlock(&sysdev_drivers_lock);
-		kobject_uevent(&sysdev->kobj, KOBJ_ADD);
-	}
-
-	return error;
-}
-
-void sysdev_unregister(struct sys_device *sysdev)
-{
-	struct sysdev_driver *drv;
-
-	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry(drv, &sysdev->cls->drivers, entry) {
-		if (drv->remove)
-			drv->remove(sysdev);
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-
-	kobject_put(&sysdev->kobj);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_register);
-EXPORT_SYMBOL_GPL(sysdev_unregister);
-
-#define to_ext_attr(x) container_of(x, struct sysdev_ext_attribute, attr)
-
-ssize_t sysdev_store_ulong(struct sys_device *sysdev,
-			   struct sysdev_attribute *attr,
-			   const char *buf, size_t size)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	char *end;
-	unsigned long new = simple_strtoul(buf, &end, 0);
-	if (end == buf)
-		return -EINVAL;
-	*(unsigned long *)(ea->var) = new;
-	/* Always return full write size even if we didn't consume all */
-	return size;
-}
-EXPORT_SYMBOL_GPL(sysdev_store_ulong);
-
-ssize_t sysdev_show_ulong(struct sys_device *sysdev,
-			  struct sysdev_attribute *attr,
-			  char *buf)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var));
-}
-EXPORT_SYMBOL_GPL(sysdev_show_ulong);
-
-ssize_t sysdev_store_int(struct sys_device *sysdev,
-			   struct sysdev_attribute *attr,
-			   const char *buf, size_t size)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	char *end;
-	long new = simple_strtol(buf, &end, 0);
-	if (end == buf || new > INT_MAX || new < INT_MIN)
-		return -EINVAL;
-	*(int *)(ea->var) = new;
-	/* Always return full write size even if we didn't consume all */
-	return size;
-}
-EXPORT_SYMBOL_GPL(sysdev_store_int);
-
-ssize_t sysdev_show_int(struct sys_device *sysdev,
-			  struct sysdev_attribute *attr,
-			  char *buf)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var));
-}
-EXPORT_SYMBOL_GPL(sysdev_show_int);
-
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
deleted file mode 100644
index 20f63d3e6144..000000000000
--- a/include/linux/sysdev.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/**
- * System devices follow a slightly different driver model. 
- * They don't need to do dynammic driver binding, can't be probed, 
- * and don't reside on any type of peripheral bus. 
- * So, we represent and treat them a little differently.
- * 
- * We still have a notion of a driver for a system device, because we still
- * want to perform basic operations on these devices. 
- *
- * We also support auxiliary drivers binding to devices of a certain class.
- * 
- * This allows configurable drivers to register themselves for devices of
- * a certain type. And, it allows class definitions to reside in generic
- * code while arch-specific code can register specific drivers.
- *
- * Auxiliary drivers registered with a NULL cls are registered as drivers
- * for all system devices, and get notification calls for each device. 
- */
-
-
-#ifndef _SYSDEV_H_
-#define _SYSDEV_H_
-
-#include <linux/kobject.h>
-#include <linux/pm.h>
-
-
-struct sys_device;
-struct sysdev_class_attribute;
-
-struct sysdev_class {
-	const char *name;
-	struct list_head	drivers;
-	struct sysdev_class_attribute **attrs;
-	struct kset		kset;
-};
-
-struct sysdev_class_attribute {
-	struct attribute attr;
-	ssize_t (*show)(struct sysdev_class *, struct sysdev_class_attribute *,
-			char *);
-	ssize_t (*store)(struct sysdev_class *, struct sysdev_class_attribute *,
-			 const char *, size_t);
-};
-
-#define _SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) 		\
-{					 			\
-	.attr = {.name = __stringify(_name), .mode = _mode },	\
-	.show	= _show,					\
-	.store	= _store,					\
-}
-
-#define SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) 		\
-	struct sysdev_class_attribute attr_##_name = 		\
-		_SYSDEV_CLASS_ATTR(_name,_mode,_show,_store)
-
-
-extern int sysdev_class_register(struct sysdev_class *);
-extern void sysdev_class_unregister(struct sysdev_class *);
-
-extern int sysdev_class_create_file(struct sysdev_class *,
-	struct sysdev_class_attribute *);
-extern void sysdev_class_remove_file(struct sysdev_class *,
-	struct sysdev_class_attribute *);
-/**
- * Auxiliary system device drivers.
- */
-
-struct sysdev_driver {
-	struct list_head	entry;
-	int	(*add)(struct sys_device *);
-	int	(*remove)(struct sys_device *);
-};
-
-
-extern int sysdev_driver_register(struct sysdev_class *, struct sysdev_driver *);
-extern void sysdev_driver_unregister(struct sysdev_class *, struct sysdev_driver *);
-
-
-/**
- * sys_devices can be simplified a lot from regular devices, because they're
- * simply not as versatile. 
- */
-
-struct sys_device {
-	u32		id;
-	struct sysdev_class	* cls;
-	struct kobject		kobj;
-};
-
-extern int sysdev_register(struct sys_device *);
-extern void sysdev_unregister(struct sys_device *);
-
-
-struct sysdev_attribute { 
-	struct attribute	attr;
-	ssize_t (*show)(struct sys_device *, struct sysdev_attribute *, char *);
-	ssize_t (*store)(struct sys_device *, struct sysdev_attribute *,
-			 const char *, size_t);
-};
-
-
-#define _SYSDEV_ATTR(_name, _mode, _show, _store)		\
-{								\
-	.attr = { .name = __stringify(_name), .mode = _mode },	\
-	.show	= _show,					\
-	.store	= _store,					\
-}
-
-#define SYSDEV_ATTR(_name, _mode, _show, _store)		\
-	struct sysdev_attribute attr_##_name =			\
-		_SYSDEV_ATTR(_name, _mode, _show, _store);
-
-extern int sysdev_create_file(struct sys_device *, struct sysdev_attribute *);
-extern void sysdev_remove_file(struct sys_device *, struct sysdev_attribute *);
-
-/* Create/remove NULL terminated attribute list */
-static inline int
-sysdev_create_files(struct sys_device *d, struct sysdev_attribute **a)
-{
-	return sysfs_create_files(&d->kobj, (const struct attribute **)a);
-}
-
-static inline void
-sysdev_remove_files(struct sys_device *d, struct sysdev_attribute **a)
-{
-	return sysfs_remove_files(&d->kobj, (const struct attribute **)a);
-}
-
-struct sysdev_ext_attribute {
-	struct sysdev_attribute attr;
-	void *var;
-};
-
-/*
- * Support for simple variable sysdev attributes.
- * The pointer to the variable is stored in a sysdev_ext_attribute
- */
-
-/* Add more types as needed */
-
-extern ssize_t sysdev_show_ulong(struct sys_device *, struct sysdev_attribute *,
-				char *);
-extern ssize_t sysdev_store_ulong(struct sys_device *,
-			struct sysdev_attribute *, const char *, size_t);
-extern ssize_t sysdev_show_int(struct sys_device *, struct sysdev_attribute *,
-				char *);
-extern ssize_t sysdev_store_int(struct sys_device *,
-			struct sysdev_attribute *, const char *, size_t);
-
-#define _SYSDEV_ULONG_ATTR(_name, _mode, _var)				\
-	{ _SYSDEV_ATTR(_name, _mode, sysdev_show_ulong, sysdev_store_ulong), \
-	  &(_var) }
-#define SYSDEV_ULONG_ATTR(_name, _mode, _var)			\
-	struct sysdev_ext_attribute attr_##_name = 		\
-		_SYSDEV_ULONG_ATTR(_name, _mode, _var);
-#define _SYSDEV_INT_ATTR(_name, _mode, _var)				\
-	{ _SYSDEV_ATTR(_name, _mode, sysdev_show_int, sysdev_store_int), \
-	  &(_var) }
-#define SYSDEV_INT_ATTR(_name, _mode, _var)			\
-	struct sysdev_ext_attribute attr_##_name = 		\
-		_SYSDEV_INT_ATTR(_name, _mode, _var);
-
-#endif /* _SYSDEV_H_ */
-- 
cgit v1.2.3


From 2d58d7ea9164da59d0ea82fdf80e3ababe52d58c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 4 Nov 2011 10:31:04 +0100
Subject: thermal: Rename generate_netlink_event

It doesn't seem right for the thermal subsystem to export a symbol
named generate_netlink_event. This function is thermal-specific and
its name should reflect that fact. Rename it to
thermal_generate_netlink_event.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: R.Durgadoss <durgadoss.r@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/thermal/sysfs-api.txt | 2 +-
 drivers/thermal/thermal_sys.c       | 4 ++--
 include/linux/thermal.h             | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index b61e46f449aa..1733ab947a95 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -284,7 +284,7 @@ method, the sys I/F structure will be built like this:
 The framework includes a simple notification mechanism, in the form of a
 netlink event. Netlink socket initialization is done during the _init_
 of the framework. Drivers which intend to use the notification mechanism
-just need to call generate_netlink_event() with two arguments viz
+just need to call thermal_generate_netlink_event() with two arguments viz
 (originator, event). Typically the originator will be an integer assigned
 to a thermal_zone_device when it registers itself with the framework. The
 event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index dd9a5743fa99..220ce7e31cf5 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -1304,7 +1304,7 @@ static struct genl_multicast_group thermal_event_mcgrp = {
 	.name = THERMAL_GENL_MCAST_GROUP_NAME,
 };
 
-int generate_netlink_event(u32 orig, enum events event)
+int thermal_generate_netlink_event(u32 orig, enum events event)
 {
 	struct sk_buff *skb;
 	struct nlattr *attr;
@@ -1363,7 +1363,7 @@ int generate_netlink_event(u32 orig, enum events event)
 
 	return result;
 }
-EXPORT_SYMBOL(generate_netlink_event);
+EXPORT_SYMBOL(thermal_generate_netlink_event);
 
 static int genetlink_init(void)
 {
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 47b4a27e6e97..796f1ff0388c 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -152,9 +152,9 @@ struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
 
 #ifdef CONFIG_NET
-extern int generate_netlink_event(u32 orig, enum events event);
+extern int thermal_generate_netlink_event(u32 orig, enum events event);
 #else
-static inline int generate_netlink_event(u32 orig, enum events event)
+static inline int thermal_generate_netlink_event(u32 orig, enum events event)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 6536e3123e5d3371a6f52e32a3d0694bcc987702 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 20 Jan 2012 14:33:53 -0800
Subject: mm: fix warnings regarding enum migrate_mode

sparc64 allmodconfig:

In file included from include/linux/compat.h:15,
                 from /usr/src/25/arch/sparc/include/asm/siginfo.h:19,
                 from include/linux/signal.h:5,
                 from include/linux/sched.h:73,
                 from arch/sparc/kernel/asm-offsets.c:13:
include/linux/fs.h:618: warning: parameter has incomplete type

It seems that my sparc64 compiler (gcc-3.4.5) doesn't like the forward
declaration of enums.

Fix this by moving the "enum migrate_mode" definition into its own header
file.

Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h           |  2 +-
 include/linux/migrate.h      | 14 +-------------
 include/linux/migrate_mode.h | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/migrate_mode.h

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0244082d42c5..4b3a41fe22bf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,6 +10,7 @@
 #include <linux/ioctl.h>
 #include <linux/blk_types.h>
 #include <linux/types.h>
+#include <linux/migrate_mode.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -526,7 +527,6 @@ enum positive_aop_returns {
 struct page;
 struct address_space;
 struct writeback_control;
-enum migrate_mode;
 
 struct iov_iter {
 	const struct iovec *iov;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index eaf867412f7a..05ed2828a553 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -3,22 +3,10 @@
 
 #include <linux/mm.h>
 #include <linux/mempolicy.h>
+#include <linux/migrate_mode.h>
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
-/*
- * MIGRATE_ASYNC means never block
- * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
- *	on most operations but not ->writepage as the potential stall time
- *	is too significant
- * MIGRATE_SYNC will block when migrating pages
- */
-enum migrate_mode {
-	MIGRATE_ASYNC,
-	MIGRATE_SYNC_LIGHT,
-	MIGRATE_SYNC,
-};
-
 #ifdef CONFIG_MIGRATION
 #define PAGE_MIGRATION 1
 
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
new file mode 100644
index 000000000000..ebf3d89a3919
--- /dev/null
+++ b/include/linux/migrate_mode.h
@@ -0,0 +1,16 @@
+#ifndef MIGRATE_MODE_H_INCLUDED
+#define MIGRATE_MODE_H_INCLUDED
+/*
+ * MIGRATE_ASYNC means never block
+ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
+ *	on most operations but not ->writepage as the potential stall time
+ *	is too significant
+ * MIGRATE_SYNC will block when migrating pages
+ */
+enum migrate_mode {
+	MIGRATE_ASYNC,
+	MIGRATE_SYNC_LIGHT,
+	MIGRATE_SYNC,
+};
+
+#endif		/* MIGRATE_MODE_H_INCLUDED */
-- 
cgit v1.2.3


From cb78edfdcef5259ac9e9088bd63810d21299928d Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Fri, 20 Jan 2012 14:34:16 -0800
Subject: kdump: define KEXEC_NOTE_BYTES arch specific for s390x

kdump only allocates memory for the prstatus ELF note.  For s390x,
besides of prstatus multiple ELF notes for various different register
types are stored.  Therefore the currently allocated memory is not
sufficient.  With this patch the KEXEC_NOTE_BYTES macro can be defined
by architecture code and for s390x it is set to the correct size now.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/kexec.h | 18 ++++++++++++++++++
 include/linux/kexec.h         |  2 ++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index cf4e47b0948c..3f30dac804ea 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -42,6 +42,24 @@
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_S390
 
+/*
+ * Size for s390x ELF notes per CPU
+ *
+ * Seven notes plus zero note at the end: prstatus, fpregset, timer,
+ * tod_cmp, tod_reg, control regs, and prefix
+ */
+#define KEXEC_NOTE_BYTES \
+	(ALIGN(sizeof(struct elf_note), 4) * 8 + \
+	 ALIGN(sizeof("CORE"), 4) * 7 + \
+	 ALIGN(sizeof(struct elf_prstatus), 4) + \
+	 ALIGN(sizeof(elf_fpregset_t), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u32), 4) + \
+	 ALIGN(sizeof(u64) * 16, 4) + \
+	 ALIGN(sizeof(u32), 4) \
+	)
+
 /* Provide a dummy definition to avoid build failures. */
 static inline void crash_setup_regs(struct pt_regs *newregs,
 					struct pt_regs *oldregs) { }
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 2fa0901219d4..0d7d6a1b172f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -50,9 +50,11 @@
  * note header.  For kdump, the code in vmcore.c runs in the context
  * of the second kernel to combine them into one note.
  */
+#ifndef KEXEC_NOTE_BYTES
 #define KEXEC_NOTE_BYTES ( (KEXEC_NOTE_HEAD_BYTES * 2) +		\
 			    KEXEC_CORE_NOTE_NAME_BYTES +		\
 			    KEXEC_CORE_NOTE_DESC_BYTES )
+#endif
 
 /*
  * This structure is used to hold the arguments that are used when loading
-- 
cgit v1.2.3


From 245132643e1cfcd145bbc86a716c1818371fcb93 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 20 Jan 2012 14:34:21 -0800
Subject: SHM_UNLOCK: fix Unevictable pages stranded after swap

Commit cc39c6a9bbde ("mm: account skipped entries to avoid looping in
find_get_pages") correctly fixed an infinite loop; but left a problem
that find_get_pages() on shmem would return 0 (appearing to callers to
mean end of tree) when it meets a run of nr_pages swap entries.

The only uses of find_get_pages() on shmem are via pagevec_lookup(),
called from invalidate_mapping_pages(), and from shmctl SHM_UNLOCK's
scan_mapping_unevictable_pages().  The first is already commented, and
not worth worrying about; but the second can leave pages on the
Unevictable list after an unusual sequence of swapping and locking.

Fix that by using shmem_find_get_pages_and_swap() (then ignoring the
swap) instead of pagevec_lookup().

But I don't want to contaminate vmscan.c with shmem internals, nor
shmem.c with LRU locking.  So move scan_mapping_unevictable_pages() into
shmem.c, renaming it shmem_unlock_mapping(); and rename
check_move_unevictable_page() to check_move_unevictable_pages(), looping
down an array of pages, oftentimes under the same lock.

Leave out the "rotate unevictable list" block: that's a leftover from
when this was used for /proc/sys/vm/scan_unevictable_pages, whose flawed
handling involved looking at pages at tail of LRU.

Was there significance to the sequence first ClearPageUnevictable, then
test page_evictable, then SetPageUnevictable here? I think not, we're
under LRU lock, and have no barriers between those.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michel Lespinasse <walken@google.com>
Cc: <stable@vger.kernel.org> [back to 3.1 but will need respins]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h |   1 +
 include/linux/swap.h     |   2 +-
 ipc/shm.c                |   2 +-
 mm/shmem.c               |  46 +++++++++++++++--
 mm/vmscan.c              | 128 +++++++++++++++--------------------------------
 5 files changed, 83 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e4c711c6f321..79ab2555b3b0 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,6 +48,7 @@ extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern void shmem_unlock_mapping(struct address_space *mapping);
 extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 06061a7f8e69..3e60228e7299 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -273,7 +273,7 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 #endif
 
 extern int page_evictable(struct page *page, struct vm_area_struct *vma);
-extern void scan_mapping_unevictable_pages(struct address_space *);
+extern void check_move_unevictable_pages(struct page **, int nr_pages);
 
 extern unsigned long scan_unevictable_pages;
 extern int scan_unevictable_handler(struct ctl_table *, int,
diff --git a/ipc/shm.c b/ipc/shm.c
index 854ab58e5f6e..b76be5bda6c2 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -916,7 +916,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 		shp->mlock_user = NULL;
 		get_file(shm_file);
 		shm_unlock(shp);
-		scan_mapping_unevictable_pages(shm_file->f_mapping);
+		shmem_unlock_mapping(shm_file->f_mapping);
 		fput(shm_file);
 		goto out;
 	}
diff --git a/mm/shmem.c b/mm/shmem.c
index 4aaa53abe302..269d049294ab 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -379,7 +379,7 @@ static int shmem_free_swap(struct address_space *mapping,
 /*
  * Pagevec may contain swap entries, so shuffle up pages before releasing.
  */
-static void shmem_pagevec_release(struct pagevec *pvec)
+static void shmem_deswap_pagevec(struct pagevec *pvec)
 {
 	int i, j;
 
@@ -389,7 +389,36 @@ static void shmem_pagevec_release(struct pagevec *pvec)
 			pvec->pages[j++] = page;
 	}
 	pvec->nr = j;
-	pagevec_release(pvec);
+}
+
+/*
+ * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
+ */
+void shmem_unlock_mapping(struct address_space *mapping)
+{
+	struct pagevec pvec;
+	pgoff_t indices[PAGEVEC_SIZE];
+	pgoff_t index = 0;
+
+	pagevec_init(&pvec, 0);
+	/*
+	 * Minor point, but we might as well stop if someone else SHM_LOCKs it.
+	 */
+	while (!mapping_unevictable(mapping)) {
+		/*
+		 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
+		 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
+		 */
+		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+					PAGEVEC_SIZE, pvec.pages, indices);
+		if (!pvec.nr)
+			break;
+		index = indices[pvec.nr - 1] + 1;
+		shmem_deswap_pagevec(&pvec);
+		check_move_unevictable_pages(pvec.pages, pvec.nr);
+		pagevec_release(&pvec);
+		cond_resched();
+	}
 }
 
 /*
@@ -440,7 +469,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			}
 			unlock_page(page);
 		}
-		shmem_pagevec_release(&pvec);
+		shmem_deswap_pagevec(&pvec);
+		pagevec_release(&pvec);
 		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
@@ -470,7 +500,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			continue;
 		}
 		if (index == start && indices[0] > end) {
-			shmem_pagevec_release(&pvec);
+			shmem_deswap_pagevec(&pvec);
+			pagevec_release(&pvec);
 			break;
 		}
 		mem_cgroup_uncharge_start();
@@ -494,7 +525,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			}
 			unlock_page(page);
 		}
-		shmem_pagevec_release(&pvec);
+		shmem_deswap_pagevec(&pvec);
+		pagevec_release(&pvec);
 		mem_cgroup_uncharge_end();
 		index++;
 	}
@@ -2438,6 +2470,10 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	return 0;
 }
 
+void shmem_unlock_mapping(struct address_space *mapping)
+{
+}
+
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
 	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e097c1026b58..c52b23552659 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -26,7 +26,6 @@
 #include <linux/buffer_head.h>	/* for try_to_release_page(),
 					buffer_heads_over_limit */
 #include <linux/mm_inline.h>
-#include <linux/pagevec.h>
 #include <linux/backing-dev.h>
 #include <linux/rmap.h>
 #include <linux/topology.h>
@@ -661,7 +660,7 @@ redo:
 		 * When racing with an mlock or AS_UNEVICTABLE clearing
 		 * (page is unlocked) make sure that if the other thread
 		 * does not observe our setting of PG_lru and fails
-		 * isolation/check_move_unevictable_page,
+		 * isolation/check_move_unevictable_pages,
 		 * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
 		 * the page back to the evictable list.
 		 *
@@ -3501,107 +3500,58 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
 
 #ifdef CONFIG_SHMEM
 /**
- * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
- * @page: page to check evictability and move to appropriate lru list
- * @zone: zone page is in
+ * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
+ * @pages:	array of pages to check
+ * @nr_pages:	number of pages to check
  *
- * Checks a page for evictability and moves the page to the appropriate
- * zone lru list.
- *
- * Restrictions: zone->lru_lock must be held, page must be on LRU and must
- * have PageUnevictable set.
+ * Checks pages for evictability and moves them to the appropriate lru list.
  *
  * This function is only used for SysV IPC SHM_UNLOCK.
  */
-static void check_move_unevictable_page(struct page *page, struct zone *zone)
+void check_move_unevictable_pages(struct page **pages, int nr_pages)
 {
 	struct lruvec *lruvec;
+	struct zone *zone = NULL;
+	int pgscanned = 0;
+	int pgrescued = 0;
+	int i;
 
-	VM_BUG_ON(PageActive(page));
-retry:
-	ClearPageUnevictable(page);
-	if (page_evictable(page, NULL)) {
-		enum lru_list l = page_lru_base_type(page);
-
-		__dec_zone_state(zone, NR_UNEVICTABLE);
-		lruvec = mem_cgroup_lru_move_lists(zone, page,
-						   LRU_UNEVICTABLE, l);
-		list_move(&page->lru, &lruvec->lists[l]);
-		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
-		__count_vm_event(UNEVICTABLE_PGRESCUED);
-	} else {
-		/*
-		 * rotate unevictable list
-		 */
-		SetPageUnevictable(page);
-		lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE,
-						   LRU_UNEVICTABLE);
-		list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]);
-		if (page_evictable(page, NULL))
-			goto retry;
-	}
-}
-
-/**
- * scan_mapping_unevictable_pages - scan an address space for evictable pages
- * @mapping: struct address_space to scan for evictable pages
- *
- * Scan all pages in mapping.  Check unevictable pages for
- * evictability and move them to the appropriate zone lru list.
- *
- * This function is only used for SysV IPC SHM_UNLOCK.
- */
-void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
-	pgoff_t next = 0;
-	pgoff_t end   = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
-			 PAGE_CACHE_SHIFT;
-	struct zone *zone;
-	struct pagevec pvec;
-
-	if (mapping->nrpages == 0)
-		return;
-
-	pagevec_init(&pvec, 0);
-	while (next < end &&
-		pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
-		int i;
-		int pg_scanned = 0;
-
-		zone = NULL;
-
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-			pgoff_t page_index = page->index;
-			struct zone *pagezone = page_zone(page);
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page = pages[i];
+		struct zone *pagezone;
 
-			pg_scanned++;
-			if (page_index > next)
-				next = page_index;
-			next++;
+		pgscanned++;
+		pagezone = page_zone(page);
+		if (pagezone != zone) {
+			if (zone)
+				spin_unlock_irq(&zone->lru_lock);
+			zone = pagezone;
+			spin_lock_irq(&zone->lru_lock);
+		}
 
-			if (pagezone != zone) {
-				if (zone)
-					spin_unlock_irq(&zone->lru_lock);
-				zone = pagezone;
-				spin_lock_irq(&zone->lru_lock);
-			}
+		if (!PageLRU(page) || !PageUnevictable(page))
+			continue;
 
-			if (PageLRU(page) && PageUnevictable(page))
-				check_move_unevictable_page(page, zone);
+		if (page_evictable(page, NULL)) {
+			enum lru_list lru = page_lru_base_type(page);
+
+			VM_BUG_ON(PageActive(page));
+			ClearPageUnevictable(page);
+			__dec_zone_state(zone, NR_UNEVICTABLE);
+			lruvec = mem_cgroup_lru_move_lists(zone, page,
+						LRU_UNEVICTABLE, lru);
+			list_move(&page->lru, &lruvec->lists[lru]);
+			__inc_zone_state(zone, NR_INACTIVE_ANON + lru);
+			pgrescued++;
 		}
-		if (zone)
-			spin_unlock_irq(&zone->lru_lock);
-		pagevec_release(&pvec);
+	}
 
-		count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
-		cond_resched();
+	if (zone) {
+		__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+		__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+		spin_unlock_irq(&zone->lru_lock);
 	}
 }
-#else
-void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
-}
 #endif /* CONFIG_SHMEM */
 
 static void warn_scan_unevictable_pages(void)
-- 
cgit v1.2.3


From 2eda013f4894bc200124f791a56c4defb613a0cc Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:51 -0800
Subject: kernel-doc: fix new warnings in device.h

Fix new kernel-doc warnings:

Warning(include/linux/device.h:299): No description found for parameter 'name'
Warning(include/linux/device.h:299): No description found for parameter 'subsys'
Warning(include/linux/device.h:299): No description found for parameter 'node'
Warning(include/linux/device.h:299): No description found for parameter 'add_dev'
Warning(include/linux/device.h:299): No description found for parameter 'remove_dev'
Warning(include/linux/device.h:685): No description found for parameter 'id'
Warning(include/linux/device.h:1009): No description found for parameter '__driver'
Warning(include/linux/device.h:1009): No description found for parameter '__register'
Warning(include/linux/device.h:1009): No description found for parameter '__unregister'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/device.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 5b3adb8f9588..b63fb393aa58 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -279,11 +279,11 @@ struct device *driver_find_device(struct device_driver *drv,
 
 /**
  * struct subsys_interface - interfaces to device functions
- * @name        name of the device function
- * @subsystem   subsytem of the devices to attach to
- * @node        the list of functions registered at the subsystem
- * @add         device hookup to device function handler
- * @remove      device hookup to device function handler
+ * @name:       name of the device function
+ * @subsys:     subsytem of the devices to attach to
+ * @node:       the list of functions registered at the subsystem
+ * @add_dev:    device hookup to device function handler
+ * @remove_dev: device hookup to device function handler
  *
  * Simple interfaces attached to a subsystem. Multiple interfaces can
  * attach to a subsystem and its devices. Unlike drivers, they do not
@@ -612,6 +612,7 @@ struct device_dma_parameters {
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @devt:	For creating the sysfs "dev".
+ * @id:		device instance
  * @devres_lock: Spinlock to protect the resource of the device.
  * @devres_head: The resources list of the device.
  * @knode_class: The node used to add the device to the class list.
@@ -1003,6 +1004,10 @@ extern long sysfs_deprecated;
  * Each module may only use this macro once, and calling it replaces
  * module_init() and module_exit().
  *
+ * @__driver: driver name
+ * @__register: register function for this driver type
+ * @__unregister: unregister function for this driver type
+ *
  * Use this macro to construct bus specific macros for registering
  * drivers, and do not use it on its own.
  */
-- 
cgit v1.2.3


From 4d922612df8bd1202a1f51d95b78aca3d67302cd Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:56 -0800
Subject: kernel-doc: fix new warning in usb.h

Fix new kernel-doc warning:

Warning(include/linux/usb.h:1251): No description found for parameter 'num_mapped_sgs'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 27a4e16d2bf1..69d845739bc2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1073,6 +1073,7 @@ typedef void (*usb_complete_t)(struct urb *);
  *	which the host controller driver should use in preference to the
  *	transfer_buffer.
  * @sg: scatter gather buffer list
+ * @num_mapped_sgs: (internal) number of mapped sg entries
  * @num_sgs: number of entries in the sg list
  * @transfer_buffer_length: How big is transfer_buffer.  The transfer may
  *	be broken up into chunks according to the current maximum packet
-- 
cgit v1.2.3


From fa757281a08799fd6c0f7ec6f111d1cd66afc97b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:03:13 -0800
Subject: kernel-doc: fix kernel-doc warnings in sched

Fix new kernel-doc notation warnings:

Warning(include/linux/sched.h:2094): No description found for parameter 'p'
Warning(include/linux/sched.h:2094): Excess function parameter 'tsk' description in 'is_idle_task'
Warning(kernel/sched/cpupri.c:139): No description found for parameter 'newpri'
Warning(kernel/sched/cpupri.c:139): Excess function parameter 'pri' description in 'cpupri_set'
Warning(kernel/sched/cpupri.c:208): Excess function parameter 'bootmem' description in 'cpupri_init'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc:	Ingo Molnar <mingo@elte.hu>
Cc:	Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 kernel/sched/cpupri.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4032ec1cf836..513f52459872 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2088,7 +2088,7 @@ extern int sched_setscheduler_nocheck(struct task_struct *, int,
 extern struct task_struct *idle_task(int cpu);
 /**
  * is_idle_task - is the specified task an idle task?
- * @tsk: the task in question.
+ * @p: the task in question.
  */
 static inline bool is_idle_task(struct task_struct *p)
 {
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index b0d798eaf130..d72586fdf660 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -129,7 +129,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
  * cpupri_set - update the cpu priority setting
  * @cp: The cpupri context
  * @cpu: The target cpu
- * @pri: The priority (INVALID-RT99) to assign to this CPU
+ * @newpri: The priority (INVALID-RT99) to assign to this CPU
  *
  * Note: Assumes cpu_rq(cpu)->lock is locked
  *
@@ -200,7 +200,6 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 /**
  * cpupri_init - initialize the cpupri structure
  * @cp: The cpupri context
- * @bootmem: true if allocations need to use bootmem
  *
  * Returns: -ENOMEM if memory fails.
  */
-- 
cgit v1.2.3


From c1aab02dac690af7ff634d8e1cb3be6a04387eef Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 24 Jan 2012 11:41:32 +1100
Subject: migrate_mode.h is not exported to user mode

so move its include into fs.h inside the __KERNEL__ protection.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b3a41fe22bf..386da09f229d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,7 +10,6 @@
 #include <linux/ioctl.h>
 #include <linux/blk_types.h>
 #include <linux/types.h>
-#include <linux/migrate_mode.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -397,6 +396,7 @@ struct inodes_stat_t {
 #include <linux/rculist_bl.h>
 #include <linux/atomic.h>
 #include <linux/shrinker.h>
+#include <linux/migrate_mode.h>
 
 #include <asm/byteorder.h>
 
-- 
cgit v1.2.3


From c1084a56da255ef5385c0f587e16fdc225a5460f Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 21 Dec 2011 10:19:38 +0200
Subject: usb: otg: kill langwell_otg driver

The way this driver was added by f0ae849 (usb: Add Intel Langwell USB
OTG Transceiver Driver) never even compiled together with langwell_udc,
and that's the only way for it to be useful.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: stable@vger.kernel.org # v2.6.31+
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Alan Cox <alan@linux.intel.com>
Cc: linux-usb@vger.kernel.org
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/otg/Kconfig          |   14 -
 drivers/usb/otg/Makefile         |    1 -
 drivers/usb/otg/langwell_otg.c   | 2347 --------------------------------------
 include/linux/usb/langwell_otg.h |  139 ---
 4 files changed, 2501 deletions(-)
 delete mode 100644 drivers/usb/otg/langwell_otg.c
 delete mode 100644 include/linux/usb/langwell_otg.h

(limited to 'include/linux')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index 2a25955881fc..9105c285f594 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -86,20 +86,6 @@ config NOP_USB_XCEIV
 	  built-in with usb ip or which are autonomous and doesn't require any
 	  phy programming such as ISP1x04 etc.
 
-config USB_LANGWELL_OTG
-	tristate "Intel Langwell USB OTG dual-role support"
-	depends on USB && PCI && INTEL_SCU_IPC
-	select USB_OTG
-	select USB_OTG_UTILS
-	help
-	  Say Y here if you want to build Intel Langwell USB OTG
-	  transciever driver in kernel. This driver implements role
-	  switch between EHCI host driver and Langwell USB OTG
-	  client driver.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called langwell_otg.
-
 config USB_MSM_OTG
 	tristate "OTG support for Qualcomm on-chip USB controller"
 	depends on (USB || USB_GADGET) && ARCH_MSM
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index b2c5a9598637..41aa5098b139 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_USB_GPIO_VBUS)	+= gpio_vbus.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
 obj-$(CONFIG_TWL4030_USB)	+= twl4030-usb.o
 obj-$(CONFIG_TWL6030_USB)	+= twl6030-usb.o
-obj-$(CONFIG_USB_LANGWELL_OTG)	+= langwell_otg.o
 obj-$(CONFIG_NOP_USB_XCEIV)	+= nop-usb-xceiv.o
 obj-$(CONFIG_USB_ULPI)		+= ulpi.o
 obj-$(CONFIG_USB_ULPI_VIEWPORT)	+= ulpi_viewport.o
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
deleted file mode 100644
index f08f784086f7..000000000000
--- a/drivers/usb/otg/langwell_otg.c
+++ /dev/null
@@ -1,2347 +0,0 @@
-/*
- * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008 - 2010, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-/* This driver helps to switch Langwell OTG controller function between host
- * and peripheral. It works with EHCI driver and Langwell client controller
- * driver together.
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/moduleparam.h>
-#include <linux/usb/ch9.h>
-#include <linux/usb/gadget.h>
-#include <linux/usb.h>
-#include <linux/usb/otg.h>
-#include <linux/usb/hcd.h>
-#include <linux/notifier.h>
-#include <linux/delay.h>
-#include <asm/intel_scu_ipc.h>
-
-#include <linux/usb/langwell_otg.h>
-
-#define	DRIVER_DESC		"Intel Langwell USB OTG transceiver driver"
-#define	DRIVER_VERSION		"July 10, 2010"
-
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_AUTHOR("Henry Yuan <hang.yuan@intel.com>, Hao Wu <hao.wu@intel.com>");
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
-
-static const char driver_name[] = "langwell_otg";
-
-static int langwell_otg_probe(struct pci_dev *pdev,
-			const struct pci_device_id *id);
-static void langwell_otg_remove(struct pci_dev *pdev);
-static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message);
-static int langwell_otg_resume(struct pci_dev *pdev);
-
-static int langwell_otg_set_host(struct otg_transceiver *otg,
-				struct usb_bus *host);
-static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
-				struct usb_gadget *gadget);
-static int langwell_otg_start_srp(struct otg_transceiver *otg);
-
-static const struct pci_device_id pci_ids[] = {{
-	.class =        ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
-	.class_mask =   ~0,
-	.vendor =	0x8086,
-	.device =	0x0811,
-	.subvendor =	PCI_ANY_ID,
-	.subdevice =	PCI_ANY_ID,
-}, { /* end: all zeroes */ }
-};
-
-static struct pci_driver otg_pci_driver = {
-	.name =		(char *) driver_name,
-	.id_table =	pci_ids,
-
-	.probe =	langwell_otg_probe,
-	.remove =	langwell_otg_remove,
-
-	.suspend =	langwell_otg_suspend,
-	.resume =	langwell_otg_resume,
-};
-
-/* HSM timers */
-static inline struct langwell_otg_timer *otg_timer_initializer
-(void (*function)(unsigned long), unsigned long expires, unsigned long data)
-{
-	struct langwell_otg_timer *timer;
-	timer = kmalloc(sizeof(struct langwell_otg_timer), GFP_KERNEL);
-	if (timer == NULL)
-		return timer;
-
-	timer->function = function;
-	timer->expires = expires;
-	timer->data = data;
-	return timer;
-}
-
-static struct langwell_otg_timer *a_wait_vrise_tmr, *a_aidl_bdis_tmr,
-	*b_se0_srp_tmr, *b_srp_init_tmr;
-
-static struct list_head active_timers;
-
-static struct langwell_otg *the_transceiver;
-
-/* host/client notify transceiver when event affects HNP state */
-void langwell_update_transceiver(void)
-{
-	struct langwell_otg *lnw = the_transceiver;
-
-	dev_dbg(lnw->dev, "transceiver is updated\n");
-
-	if (!lnw->qwork)
-		return ;
-
-	queue_work(lnw->qwork, &lnw->work);
-}
-EXPORT_SYMBOL(langwell_update_transceiver);
-
-static int langwell_otg_set_host(struct otg_transceiver *otg,
-					struct usb_bus *host)
-{
-	otg->host = host;
-
-	return 0;
-}
-
-static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
-					struct usb_gadget *gadget)
-{
-	otg->gadget = gadget;
-
-	return 0;
-}
-
-static int langwell_otg_set_power(struct otg_transceiver *otg,
-				unsigned mA)
-{
-	return 0;
-}
-
-/* A-device drives vbus, controlled through IPC commands */
-static int langwell_otg_set_vbus(struct otg_transceiver *otg, bool enabled)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	u8				sub_id;
-
-	dev_dbg(lnw->dev, "%s <--- %s\n", __func__, enabled ? "on" : "off");
-
-	if (enabled)
-		sub_id = 0x8; /* Turn on the VBus */
-	else
-		sub_id = 0x9; /* Turn off the VBus */
-
-	if (intel_scu_ipc_simple_command(0xef, sub_id)) {
-		dev_dbg(lnw->dev, "Failed to set Vbus via IPC commands\n");
-		return -EBUSY;
-	}
-
-	dev_dbg(lnw->dev, "%s --->\n", __func__);
-
-	return 0;
-}
-
-/* charge vbus or discharge vbus through a resistor to ground */
-static void langwell_otg_chrg_vbus(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	u32	val;
-
-	val = readl(lnw->iotg.base + CI_OTGSC);
-
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VC,
-				lnw->iotg.base + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VD,
-				lnw->iotg.base + CI_OTGSC);
-}
-
-/* Start SRP */
-static int langwell_otg_start_srp(struct otg_transceiver *otg)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s --->\n", __func__);
-
-	val = readl(iotg->base + CI_OTGSC);
-
-	writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HADP,
-				iotg->base + CI_OTGSC);
-
-	/* Check if the data plus is finished or not */
-	msleep(8);
-	val = readl(iotg->base + CI_OTGSC);
-	if (val & (OTGSC_HADP | OTGSC_DP))
-		dev_dbg(lnw->dev, "DataLine SRP Error\n");
-
-	/* Disable interrupt - b_sess_vld */
-	val = readl(iotg->base + CI_OTGSC);
-	val &= (~(OTGSC_BSVIE | OTGSC_BSEIE));
-	writel(val, iotg->base + CI_OTGSC);
-
-	/* Start VBus SRP, drive vbus to generate VBus pulse */
-	iotg->otg.set_vbus(&iotg->otg, true);
-	msleep(15);
-	iotg->otg.set_vbus(&iotg->otg, false);
-
-	/* Enable interrupt - b_sess_vld*/
-	val = readl(iotg->base + CI_OTGSC);
-	dev_dbg(lnw->dev, "after VBUS pulse otgsc = %x\n", val);
-
-	val |= (OTGSC_BSVIE | OTGSC_BSEIE);
-	writel(val, iotg->base + CI_OTGSC);
-
-	/* If Vbus is valid, then update the hsm */
-	if (val & OTGSC_BSV) {
-		dev_dbg(lnw->dev, "no b_sess_vld interrupt\n");
-
-		lnw->iotg.hsm.b_sess_vld = 1;
-		langwell_update_transceiver();
-	}
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-	return 0;
-}
-
-/* stop SOF via bus_suspend */
-static void langwell_otg_loc_sof(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	struct usb_hcd		*hcd;
-	int			err;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "suspend" : "resume");
-
-	hcd = bus_to_hcd(lnw->iotg.otg.host);
-	if (on)
-		err = hcd->driver->bus_resume(hcd);
-	else
-		err = hcd->driver->bus_suspend(hcd);
-
-	if (err)
-		dev_dbg(lnw->dev, "Fail to resume/suspend USB bus - %d\n", err);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-static int langwell_otg_check_otgsc(void)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	u32				otgsc, usbcfg;
-
-	dev_dbg(lnw->dev, "check sync OTGSC and USBCFG registers\n");
-
-	otgsc = readl(lnw->iotg.base + CI_OTGSC);
-	usbcfg = readl(lnw->usbcfg);
-
-	dev_dbg(lnw->dev, "OTGSC = %08x, USBCFG = %08x\n",
-					otgsc, usbcfg);
-	dev_dbg(lnw->dev, "OTGSC_AVV = %d\n", !!(otgsc & OTGSC_AVV));
-	dev_dbg(lnw->dev, "USBCFG.VBUSVAL = %d\n",
-					!!(usbcfg & USBCFG_VBUSVAL));
-	dev_dbg(lnw->dev, "OTGSC_ASV = %d\n", !!(otgsc & OTGSC_ASV));
-	dev_dbg(lnw->dev, "USBCFG.AVALID = %d\n",
-					!!(usbcfg & USBCFG_AVALID));
-	dev_dbg(lnw->dev, "OTGSC_BSV = %d\n", !!(otgsc & OTGSC_BSV));
-	dev_dbg(lnw->dev, "USBCFG.BVALID = %d\n",
-					!!(usbcfg & USBCFG_BVALID));
-	dev_dbg(lnw->dev, "OTGSC_BSE = %d\n", !!(otgsc & OTGSC_BSE));
-	dev_dbg(lnw->dev, "USBCFG.SESEND = %d\n",
-					!!(usbcfg & USBCFG_SESEND));
-
-	/* Check USBCFG VBusValid/AValid/BValid/SessEnd */
-	if (!!(otgsc & OTGSC_AVV) ^ !!(usbcfg & USBCFG_VBUSVAL)) {
-		dev_dbg(lnw->dev, "OTGSC.AVV != USBCFG.VBUSVAL\n");
-		goto err;
-	}
-	if (!!(otgsc & OTGSC_ASV) ^ !!(usbcfg & USBCFG_AVALID)) {
-		dev_dbg(lnw->dev, "OTGSC.ASV != USBCFG.AVALID\n");
-		goto err;
-	}
-	if (!!(otgsc & OTGSC_BSV) ^ !!(usbcfg & USBCFG_BVALID)) {
-		dev_dbg(lnw->dev, "OTGSC.BSV != USBCFG.BVALID\n");
-		goto err;
-	}
-	if (!!(otgsc & OTGSC_BSE) ^ !!(usbcfg & USBCFG_SESEND)) {
-		dev_dbg(lnw->dev, "OTGSC.BSE != USBCFG.SESSEN\n");
-		goto err;
-	}
-
-	dev_dbg(lnw->dev, "OTGSC and USBCFG are synced\n");
-
-	return 0;
-
-err:
-	dev_warn(lnw->dev, "OTGSC isn't equal to USBCFG\n");
-	return -EPIPE;
-}
-
-
-static void langwell_otg_phy_low_power(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u8				val, phcd;
-	int				retval;
-
-	dev_dbg(lnw->dev, "%s ---> %s mode\n",
-			__func__, on ? "Low power" : "Normal");
-
-	phcd = 0x40;
-
-	val = readb(iotg->base + CI_HOSTPC1 + 2);
-
-	if (on) {
-		/* Due to hardware issue, after set PHCD, sync will failed
-		 * between USBCFG and OTGSC, so before set PHCD, check if
-		 * sync is in process now. If the answer is "yes", then do
-		 * not touch PHCD bit */
-		retval = langwell_otg_check_otgsc();
-		if (retval) {
-			dev_dbg(lnw->dev, "Skip PHCD programming..\n");
-			return ;
-		}
-
-		writeb(val | phcd, iotg->base + CI_HOSTPC1 + 2);
-	} else
-		writeb(val & ~phcd, iotg->base + CI_HOSTPC1 + 2);
-
-	dev_dbg(lnw->dev, "%s <--- done\n", __func__);
-}
-
-/* After drv vbus, add 5 ms delay to set PHCD */
-static void langwell_otg_phy_low_power_wait(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-
-	dev_dbg(lnw->dev, "add 5ms delay before programing PHCD\n");
-
-	mdelay(5);
-	langwell_otg_phy_low_power(on);
-}
-
-/* Enable/Disable OTG interrupt */
-static void langwell_otg_intr(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
-
-	val = readl(iotg->base + CI_OTGSC);
-
-	/* OTGSC_INT_MASK doesn't contains 1msInt */
-	if (on) {
-		val = val | (OTGSC_INT_MASK);
-		writel(val, iotg->base + CI_OTGSC);
-	} else {
-		val = val & ~(OTGSC_INT_MASK);
-		writel(val, iotg->base + CI_OTGSC);
-	}
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-/* set HAAR: Hardware Assist Auto-Reset */
-static void langwell_otg_HAAR(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
-
-	val = readl(iotg->base + CI_OTGSC);
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HAAR,
-					iotg->base + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HAAR,
-					iotg->base + CI_OTGSC);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-/* set HABA: Hardware Assist B-Disconnect to A-Connect */
-static void langwell_otg_HABA(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
-
-	val = readl(iotg->base + CI_OTGSC);
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HABA,
-					iotg->base + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HABA,
-					iotg->base + CI_OTGSC);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-static int langwell_otg_check_se0_srp(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	int			delay_time = TB_SE0_SRP * 10;
-	u32			val;
-
-	dev_dbg(lnw->dev, "%s --->\n", __func__);
-
-	do {
-		udelay(100);
-		if (!delay_time--)
-			break;
-		val = readl(lnw->iotg.base + CI_PORTSC1);
-		val &= PORTSC_LS;
-	} while (!val);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-	return val;
-}
-
-/* The timeout callback function to set time out bit */
-static void set_tmout(unsigned long indicator)
-{
-	*(int *)indicator = 1;
-}
-
-void langwell_otg_nsf_msg(unsigned long indicator)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-
-	switch (indicator) {
-	case 2:
-	case 4:
-	case 6:
-	case 7:
-		dev_warn(lnw->dev,
-			"OTG:NSF-%lu - deivce not responding\n", indicator);
-		break;
-	case 3:
-		dev_warn(lnw->dev,
-			"OTG:NSF-%lu - deivce not supported\n", indicator);
-		break;
-	default:
-		dev_warn(lnw->dev, "Do not have this kind of NSF\n");
-		break;
-	}
-}
-
-/* Initialize timers */
-static int langwell_otg_init_timers(struct otg_hsm *hsm)
-{
-	/* HSM used timers */
-	a_wait_vrise_tmr = otg_timer_initializer(&set_tmout, TA_WAIT_VRISE,
-				(unsigned long)&hsm->a_wait_vrise_tmout);
-	if (a_wait_vrise_tmr == NULL)
-		return -ENOMEM;
-	a_aidl_bdis_tmr = otg_timer_initializer(&set_tmout, TA_AIDL_BDIS,
-				(unsigned long)&hsm->a_aidl_bdis_tmout);
-	if (a_aidl_bdis_tmr == NULL)
-		return -ENOMEM;
-	b_se0_srp_tmr = otg_timer_initializer(&set_tmout, TB_SE0_SRP,
-				(unsigned long)&hsm->b_se0_srp);
-	if (b_se0_srp_tmr == NULL)
-		return -ENOMEM;
-	b_srp_init_tmr = otg_timer_initializer(&set_tmout, TB_SRP_INIT,
-				(unsigned long)&hsm->b_srp_init_tmout);
-	if (b_srp_init_tmr == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Free timers */
-static void langwell_otg_free_timers(void)
-{
-	kfree(a_wait_vrise_tmr);
-	kfree(a_aidl_bdis_tmr);
-	kfree(b_se0_srp_tmr);
-	kfree(b_srp_init_tmr);
-}
-
-/* The timeout callback function to set time out bit */
-static void langwell_otg_timer_fn(unsigned long indicator)
-{
-	struct langwell_otg *lnw = the_transceiver;
-
-	*(int *)indicator = 1;
-
-	dev_dbg(lnw->dev, "kernel timer - timeout\n");
-
-	langwell_update_transceiver();
-}
-
-/* kernel timer used instead of HW based interrupt */
-static void langwell_otg_add_ktimer(enum langwell_otg_timer_type timers)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	unsigned long		j = jiffies;
-	unsigned long		data, time;
-
-	switch (timers) {
-	case TA_WAIT_VRISE_TMR:
-		iotg->hsm.a_wait_vrise_tmout = 0;
-		data = (unsigned long)&iotg->hsm.a_wait_vrise_tmout;
-		time = TA_WAIT_VRISE;
-		break;
-	case TA_WAIT_BCON_TMR:
-		iotg->hsm.a_wait_bcon_tmout = 0;
-		data = (unsigned long)&iotg->hsm.a_wait_bcon_tmout;
-		time = TA_WAIT_BCON;
-		break;
-	case TA_AIDL_BDIS_TMR:
-		iotg->hsm.a_aidl_bdis_tmout = 0;
-		data = (unsigned long)&iotg->hsm.a_aidl_bdis_tmout;
-		time = TA_AIDL_BDIS;
-		break;
-	case TB_ASE0_BRST_TMR:
-		iotg->hsm.b_ase0_brst_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_ase0_brst_tmout;
-		time = TB_ASE0_BRST;
-		break;
-	case TB_SRP_INIT_TMR:
-		iotg->hsm.b_srp_init_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_srp_init_tmout;
-		time = TB_SRP_INIT;
-		break;
-	case TB_SRP_FAIL_TMR:
-		iotg->hsm.b_srp_fail_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_srp_fail_tmout;
-		time = TB_SRP_FAIL;
-		break;
-	case TB_BUS_SUSPEND_TMR:
-		iotg->hsm.b_bus_suspend_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_bus_suspend_tmout;
-		time = TB_BUS_SUSPEND;
-		break;
-	default:
-		dev_dbg(lnw->dev, "unknown timer, cannot enable it\n");
-		return;
-	}
-
-	lnw->hsm_timer.data = data;
-	lnw->hsm_timer.function = langwell_otg_timer_fn;
-	lnw->hsm_timer.expires = j + time * HZ / 1000; /* milliseconds */
-
-	add_timer(&lnw->hsm_timer);
-
-	dev_dbg(lnw->dev, "add timer successfully\n");
-}
-
-/* Add timer to timer list */
-static void langwell_otg_add_timer(void *gtimer)
-{
-	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
-	struct langwell_otg_timer *tmp_timer;
-	struct intel_mid_otg_xceiv *iotg = &the_transceiver->iotg;
-	u32	val32;
-
-	/* Check if the timer is already in the active list,
-	 * if so update timer count
-	 */
-	list_for_each_entry(tmp_timer, &active_timers, list)
-		if (tmp_timer == timer) {
-			timer->count = timer->expires;
-			return;
-		}
-	timer->count = timer->expires;
-
-	if (list_empty(&active_timers)) {
-		val32 = readl(iotg->base + CI_OTGSC);
-		writel(val32 | OTGSC_1MSE, iotg->base + CI_OTGSC);
-	}
-
-	list_add_tail(&timer->list, &active_timers);
-}
-
-/* Remove timer from the timer list; clear timeout status */
-static void langwell_otg_del_timer(void *gtimer)
-{
-	struct langwell_otg *lnw = the_transceiver;
-	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
-	struct langwell_otg_timer *tmp_timer, *del_tmp;
-	u32 val32;
-
-	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list)
-		if (tmp_timer == timer)
-			list_del(&timer->list);
-
-	if (list_empty(&active_timers)) {
-		val32 = readl(lnw->iotg.base + CI_OTGSC);
-		writel(val32 & ~OTGSC_1MSE, lnw->iotg.base + CI_OTGSC);
-	}
-}
-
-/* Reduce timer count by 1, and find timeout conditions.*/
-static int langwell_otg_tick_timer(u32 *int_sts)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	struct langwell_otg_timer *tmp_timer, *del_tmp;
-	int expired = 0;
-
-	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list) {
-		tmp_timer->count--;
-		/* check if timer expires */
-		if (!tmp_timer->count) {
-			list_del(&tmp_timer->list);
-			tmp_timer->function(tmp_timer->data);
-			expired = 1;
-		}
-	}
-
-	if (list_empty(&active_timers)) {
-		dev_dbg(lnw->dev, "tick timer: disable 1ms int\n");
-		*int_sts = *int_sts & ~OTGSC_1MSE;
-	}
-	return expired;
-}
-
-static void reset_otg(void)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	int			delay_time = 1000;
-	u32			val;
-
-	dev_dbg(lnw->dev, "reseting OTG controller ...\n");
-	val = readl(lnw->iotg.base + CI_USBCMD);
-	writel(val | USBCMD_RST, lnw->iotg.base + CI_USBCMD);
-	do {
-		udelay(100);
-		if (!delay_time--)
-			dev_dbg(lnw->dev, "reset timeout\n");
-		val = readl(lnw->iotg.base + CI_USBCMD);
-		val &= USBCMD_RST;
-	} while (val != 0);
-	dev_dbg(lnw->dev, "reset done.\n");
-}
-
-static void set_host_mode(void)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	u32			val;
-
-	reset_otg();
-	val = readl(lnw->iotg.base + CI_USBMODE);
-	val = (val & (~USBMODE_CM)) | USBMODE_HOST;
-	writel(val, lnw->iotg.base + CI_USBMODE);
-}
-
-static void set_client_mode(void)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	u32			val;
-
-	reset_otg();
-	val = readl(lnw->iotg.base + CI_USBMODE);
-	val = (val & (~USBMODE_CM)) | USBMODE_DEVICE;
-	writel(val, lnw->iotg.base + CI_USBMODE);
-}
-
-static void init_hsm(void)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val32;
-
-	/* read OTGSC after reset */
-	val32 = readl(lnw->iotg.base + CI_OTGSC);
-	dev_dbg(lnw->dev, "%s: OTGSC init value = 0x%x\n", __func__, val32);
-
-	/* set init state */
-	if (val32 & OTGSC_ID) {
-		iotg->hsm.id = 1;
-		iotg->otg.default_a = 0;
-		set_client_mode();
-		iotg->otg.state = OTG_STATE_B_IDLE;
-	} else {
-		iotg->hsm.id = 0;
-		iotg->otg.default_a = 1;
-		set_host_mode();
-		iotg->otg.state = OTG_STATE_A_IDLE;
-	}
-
-	/* set session indicator */
-	if (val32 & OTGSC_BSE)
-		iotg->hsm.b_sess_end = 1;
-	if (val32 & OTGSC_BSV)
-		iotg->hsm.b_sess_vld = 1;
-	if (val32 & OTGSC_ASV)
-		iotg->hsm.a_sess_vld = 1;
-	if (val32 & OTGSC_AVV)
-		iotg->hsm.a_vbus_vld = 1;
-
-	/* defautly power the bus */
-	iotg->hsm.a_bus_req = 1;
-	iotg->hsm.a_bus_drop = 0;
-	/* defautly don't request bus as B device */
-	iotg->hsm.b_bus_req = 0;
-	/* no system error */
-	iotg->hsm.a_clr_err = 0;
-
-	langwell_otg_phy_low_power_wait(1);
-}
-
-static void update_hsm(void)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val32;
-
-	/* read OTGSC */
-	val32 = readl(lnw->iotg.base + CI_OTGSC);
-	dev_dbg(lnw->dev, "%s: OTGSC value = 0x%x\n", __func__, val32);
-
-	iotg->hsm.id = !!(val32 & OTGSC_ID);
-	iotg->hsm.b_sess_end = !!(val32 & OTGSC_BSE);
-	iotg->hsm.b_sess_vld = !!(val32 & OTGSC_BSV);
-	iotg->hsm.a_sess_vld = !!(val32 & OTGSC_ASV);
-	iotg->hsm.a_vbus_vld = !!(val32 & OTGSC_AVV);
-}
-
-static irqreturn_t otg_dummy_irq(int irq, void *_dev)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	void __iomem		*reg_base = _dev;
-	u32			val;
-	u32			int_mask = 0;
-
-	val = readl(reg_base + CI_USBMODE);
-	if ((val & USBMODE_CM) != USBMODE_DEVICE)
-		return IRQ_NONE;
-
-	val = readl(reg_base + CI_USBSTS);
-	int_mask = val & INTR_DUMMY_MASK;
-
-	if (int_mask == 0)
-		return IRQ_NONE;
-
-	/* clear hsm.b_conn here since host driver can't detect it
-	*  otg_dummy_irq called means B-disconnect happened.
-	*/
-	if (lnw->iotg.hsm.b_conn) {
-		lnw->iotg.hsm.b_conn = 0;
-		if (spin_trylock(&lnw->wq_lock)) {
-			langwell_update_transceiver();
-			spin_unlock(&lnw->wq_lock);
-		}
-	}
-
-	/* Clear interrupts */
-	writel(int_mask, reg_base + CI_USBSTS);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t otg_irq(int irq, void *_dev)
-{
-	struct langwell_otg		*lnw = _dev;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				int_sts, int_en;
-	u32				int_mask = 0;
-	int				flag = 0;
-
-	int_sts = readl(lnw->iotg.base + CI_OTGSC);
-	int_en = (int_sts & OTGSC_INTEN_MASK) >> 8;
-	int_mask = int_sts & int_en;
-	if (int_mask == 0)
-		return IRQ_NONE;
-
-	if (int_mask & OTGSC_IDIS) {
-		dev_dbg(lnw->dev, "%s: id change int\n", __func__);
-		iotg->hsm.id = (int_sts & OTGSC_ID) ? 1 : 0;
-		dev_dbg(lnw->dev, "id = %d\n", iotg->hsm.id);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_DPIS) {
-		dev_dbg(lnw->dev, "%s: data pulse int\n", __func__);
-		iotg->hsm.a_srp_det = (int_sts & OTGSC_DPS) ? 1 : 0;
-		dev_dbg(lnw->dev, "data pulse = %d\n", iotg->hsm.a_srp_det);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_BSEIS) {
-		dev_dbg(lnw->dev, "%s: b session end int\n", __func__);
-		iotg->hsm.b_sess_end = (int_sts & OTGSC_BSE) ? 1 : 0;
-		dev_dbg(lnw->dev, "b_sess_end = %d\n", iotg->hsm.b_sess_end);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_BSVIS) {
-		dev_dbg(lnw->dev, "%s: b session valid int\n", __func__);
-		iotg->hsm.b_sess_vld = (int_sts & OTGSC_BSV) ? 1 : 0;
-		dev_dbg(lnw->dev, "b_sess_vld = %d\n", iotg->hsm.b_sess_end);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_ASVIS) {
-		dev_dbg(lnw->dev, "%s: a session valid int\n", __func__);
-		iotg->hsm.a_sess_vld = (int_sts & OTGSC_ASV) ? 1 : 0;
-		dev_dbg(lnw->dev, "a_sess_vld = %d\n", iotg->hsm.a_sess_vld);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_AVVIS) {
-		dev_dbg(lnw->dev, "%s: a vbus valid int\n", __func__);
-		iotg->hsm.a_vbus_vld = (int_sts & OTGSC_AVV) ? 1 : 0;
-		dev_dbg(lnw->dev, "a_vbus_vld = %d\n", iotg->hsm.a_vbus_vld);
-		flag = 1;
-	}
-
-	if (int_mask & OTGSC_1MSS) {
-		/* need to schedule otg_work if any timer is expired */
-		if (langwell_otg_tick_timer(&int_sts))
-			flag = 1;
-	}
-
-	writel((int_sts & ~OTGSC_INTSTS_MASK) | int_mask,
-					lnw->iotg.base + CI_OTGSC);
-	if (flag)
-		langwell_update_transceiver();
-
-	return IRQ_HANDLED;
-}
-
-static int langwell_otg_iotg_notify(struct notifier_block *nb,
-				unsigned long action, void *data)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = data;
-	int				flag = 0;
-
-	if (iotg == NULL)
-		return NOTIFY_BAD;
-
-	if (lnw == NULL)
-		return NOTIFY_BAD;
-
-	switch (action) {
-	case MID_OTG_NOTIFY_CONNECT:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Connect Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.b_conn = 1;
-		else
-			iotg->hsm.a_conn = 1;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_DISCONN:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Disconnect Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.b_conn = 0;
-		else
-			iotg->hsm.a_conn = 0;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_HSUSPEND:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Host Bus suspend Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.a_suspend_req = 1;
-		else
-			iotg->hsm.b_bus_req = 0;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_HRESUME:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Host Bus resume Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.b_bus_resume = 1;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_CSUSPEND:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Client Bus suspend Event\n");
-		if (iotg->otg.default_a == 1) {
-			if (iotg->hsm.b_bus_suspend_vld == 2) {
-				iotg->hsm.b_bus_suspend = 1;
-				iotg->hsm.b_bus_suspend_vld = 0;
-				flag = 1;
-			} else {
-				iotg->hsm.b_bus_suspend_vld++;
-				flag = 0;
-			}
-		} else {
-			if (iotg->hsm.a_bus_suspend == 0) {
-				iotg->hsm.a_bus_suspend = 1;
-				flag = 1;
-			}
-		}
-		break;
-	case MID_OTG_NOTIFY_CRESUME:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Client Bus resume Event\n");
-		if (iotg->otg.default_a == 0)
-			iotg->hsm.a_bus_suspend = 0;
-		flag = 0;
-		break;
-	case MID_OTG_NOTIFY_HOSTADD:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Host Driver Add\n");
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_HOSTREMOVE:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Host Driver remove\n");
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_CLIENTADD:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Client Driver Add\n");
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_CLIENTREMOVE:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Client Driver remove\n");
-		flag = 1;
-		break;
-	default:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity unknown notify message\n");
-		return NOTIFY_DONE;
-	}
-
-	if (flag)
-		langwell_update_transceiver();
-
-	return NOTIFY_OK;
-}
-
-static void langwell_otg_work(struct work_struct *work)
-{
-	struct langwell_otg		*lnw;
-	struct intel_mid_otg_xceiv	*iotg;
-	int				retval;
-	struct pci_dev			*pdev;
-
-	lnw = container_of(work, struct langwell_otg, work);
-	iotg = &lnw->iotg;
-	pdev = to_pci_dev(lnw->dev);
-
-	dev_dbg(lnw->dev, "%s: old state = %s\n", __func__,
-			otg_state_string(iotg->otg.state));
-
-	switch (iotg->otg.state) {
-	case OTG_STATE_UNDEFINED:
-	case OTG_STATE_B_IDLE:
-		if (!iotg->hsm.id) {
-			langwell_otg_del_timer(b_srp_init_tmr);
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.b_sess_vld) {
-			langwell_otg_del_timer(b_srp_init_tmr);
-			del_timer_sync(&lnw->hsm_timer);
-			iotg->hsm.b_sess_end = 0;
-			iotg->hsm.a_bus_suspend = 0;
-			langwell_otg_chrg_vbus(0);
-
-			if (lnw->iotg.start_peripheral) {
-				lnw->iotg.start_peripheral(&lnw->iotg);
-				iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-			} else
-				dev_dbg(lnw->dev, "client driver not loaded\n");
-
-		} else if (iotg->hsm.b_srp_init_tmout) {
-			iotg->hsm.b_srp_init_tmout = 0;
-			dev_warn(lnw->dev, "SRP init timeout\n");
-		} else if (iotg->hsm.b_srp_fail_tmout) {
-			iotg->hsm.b_srp_fail_tmout = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			/* No silence failure */
-			langwell_otg_nsf_msg(6);
-		} else if (iotg->hsm.b_bus_req && iotg->hsm.b_sess_end) {
-			del_timer_sync(&lnw->hsm_timer);
-			/* workaround for b_se0_srp detection */
-			retval = langwell_otg_check_se0_srp(0);
-			if (retval) {
-				iotg->hsm.b_bus_req = 0;
-				dev_dbg(lnw->dev, "LS isn't SE0, try later\n");
-			} else {
-				/* clear the PHCD before start srp */
-				langwell_otg_phy_low_power(0);
-
-				/* Start SRP */
-				langwell_otg_add_timer(b_srp_init_tmr);
-				iotg->otg.start_srp(&iotg->otg);
-				langwell_otg_del_timer(b_srp_init_tmr);
-				langwell_otg_add_ktimer(TB_SRP_FAIL_TMR);
-
-				/* reset PHY low power mode here */
-				langwell_otg_phy_low_power_wait(1);
-			}
-		}
-		break;
-	case OTG_STATE_B_SRP_INIT:
-		if (!iotg->hsm.id) {
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_chrg_vbus(0);
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.b_sess_vld) {
-			langwell_otg_chrg_vbus(0);
-			if (lnw->iotg.start_peripheral) {
-				lnw->iotg.start_peripheral(&lnw->iotg);
-				iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-			} else
-				dev_dbg(lnw->dev, "client driver not loaded\n");
-		}
-		break;
-	case OTG_STATE_B_PERIPHERAL:
-		if (!iotg->hsm.id) {
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.b_sess_vld) {
-			iotg->hsm.b_hnp_enable = 0;
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if (iotg->hsm.b_bus_req && iotg->otg.gadget &&
-					iotg->otg.gadget->b_hnp_enable &&
-					iotg->hsm.a_bus_suspend) {
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			langwell_otg_HAAR(1);
-			iotg->hsm.a_conn = 0;
-
-			if (lnw->iotg.start_host) {
-				lnw->iotg.start_host(&lnw->iotg);
-				iotg->otg.state = OTG_STATE_B_WAIT_ACON;
-			} else
-				dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-
-			iotg->hsm.a_bus_resume = 0;
-			langwell_otg_add_ktimer(TB_ASE0_BRST_TMR);
-		}
-		break;
-
-	case OTG_STATE_B_WAIT_ACON:
-		if (!iotg->hsm.id) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-
-			langwell_otg_HAAR(0);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.b_sess_vld) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->hsm.b_hnp_enable = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			langwell_otg_chrg_vbus(0);
-			langwell_otg_HAAR(0);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if (iotg->hsm.a_conn) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			langwell_otg_HAAR(0);
-			iotg->otg.state = OTG_STATE_B_HOST;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_resume ||
-				iotg->hsm.b_ase0_brst_tmout) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			langwell_otg_HAAR(0);
-			langwell_otg_nsf_msg(7);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			iotg->hsm.a_bus_suspend = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.start_peripheral)
-				lnw->iotg.start_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver not loaded.\n");
-
-			iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-		}
-		break;
-
-	case OTG_STATE_B_HOST:
-		if (!iotg->hsm.id) {
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.b_sess_vld) {
-			iotg->hsm.b_hnp_enable = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			langwell_otg_chrg_vbus(0);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if ((!iotg->hsm.b_bus_req) ||
-				(!iotg->hsm.a_conn)) {
-			iotg->hsm.b_bus_req = 0;
-			langwell_otg_loc_sof(0);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			iotg->hsm.a_bus_suspend = 0;
-
-			if (lnw->iotg.start_peripheral)
-				lnw->iotg.start_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-						"client driver not loaded.\n");
-
-			iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-		}
-		break;
-
-	case OTG_STATE_A_IDLE:
-		iotg->otg.default_a = 1;
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-			iotg->hsm.vbus_srp_up = 0;
-
-			langwell_otg_chrg_vbus(0);
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_bus_drop &&
-			(iotg->hsm.a_srp_det || iotg->hsm.a_bus_req)) {
-			langwell_otg_phy_low_power(0);
-
-			/* Turn on VBus */
-			iotg->otg.set_vbus(&iotg->otg, true);
-
-			iotg->hsm.vbus_srp_up = 0;
-			iotg->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_bus_drop && iotg->hsm.a_sess_vld) {
-			iotg->hsm.vbus_srp_up = 1;
-		} else if (!iotg->hsm.a_sess_vld && iotg->hsm.vbus_srp_up) {
-			msleep(10);
-			langwell_otg_phy_low_power(0);
-
-			/* Turn on VBus */
-			iotg->otg.set_vbus(&iotg->otg, true);
-			iotg->hsm.a_srp_det = 1;
-			iotg->hsm.vbus_srp_up = 0;
-			iotg->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_sess_vld &&
-				!iotg->hsm.vbus_srp_up) {
-			langwell_otg_phy_low_power(1);
-		}
-		break;
-	case OTG_STATE_A_WAIT_VRISE:
-		if (iotg->hsm.id) {
-			langwell_otg_del_timer(a_wait_vrise_tmr);
-			iotg->hsm.b_bus_req = 0;
-			iotg->otg.default_a = 0;
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if (iotg->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(a_wait_vrise_tmr);
-			iotg->hsm.b_conn = 0;
-			if (lnw->iotg.start_host)
-				lnw->iotg.start_host(&lnw->iotg);
-			else {
-				dev_dbg(lnw->dev, "host driver not loaded.\n");
-				break;
-			}
-
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		} else if (iotg->hsm.a_wait_vrise_tmout) {
-			iotg->hsm.b_conn = 0;
-			if (iotg->hsm.a_vbus_vld) {
-				if (lnw->iotg.start_host)
-					lnw->iotg.start_host(&lnw->iotg);
-				else {
-					dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-					break;
-				}
-				langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-				iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-			} else {
-
-				/* Turn off VBus */
-				iotg->otg.set_vbus(&iotg->otg, false);
-				langwell_otg_phy_low_power_wait(1);
-				iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-			}
-		}
-		break;
-	case OTG_STATE_A_WAIT_BCON:
-		if (iotg->hsm.id) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_vbus_vld) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (iotg->hsm.a_bus_drop ||
-				(iotg->hsm.a_wait_bcon_tmout &&
-				!iotg->hsm.a_bus_req)) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (iotg->hsm.b_conn) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->hsm.a_suspend_req = 0;
-			iotg->otg.state = OTG_STATE_A_HOST;
-			if (iotg->hsm.a_srp_det && iotg->otg.host &&
-					!iotg->otg.host->b_hnp_enable) {
-				/* SRP capable peripheral-only device */
-				iotg->hsm.a_bus_req = 1;
-				iotg->hsm.a_srp_det = 0;
-			} else if (!iotg->hsm.a_bus_req && iotg->otg.host &&
-					iotg->otg.host->b_hnp_enable) {
-				/* It is not safe enough to do a fast
-				 * transition from A_WAIT_BCON to
-				 * A_SUSPEND */
-				msleep(10000);
-				if (iotg->hsm.a_bus_req)
-					break;
-
-				if (request_irq(pdev->irq,
-					otg_dummy_irq, IRQF_SHARED,
-					driver_name, iotg->base) != 0) {
-					dev_dbg(lnw->dev,
-						"request interrupt %d fail\n",
-						pdev->irq);
-				}
-
-				langwell_otg_HABA(1);
-				iotg->hsm.b_bus_resume = 0;
-				iotg->hsm.a_aidl_bdis_tmout = 0;
-
-				langwell_otg_loc_sof(0);
-				/* clear PHCD to enable HW timer */
-				langwell_otg_phy_low_power(0);
-				langwell_otg_add_timer(a_aidl_bdis_tmr);
-				iotg->otg.state = OTG_STATE_A_SUSPEND;
-			} else if (!iotg->hsm.a_bus_req && iotg->otg.host &&
-				!iotg->otg.host->b_hnp_enable) {
-				if (lnw->iotg.stop_host)
-					lnw->iotg.stop_host(&lnw->iotg);
-				else
-					dev_dbg(lnw->dev,
-						"host driver removed.\n");
-
-				/* Turn off VBus */
-				iotg->otg.set_vbus(&iotg->otg, false);
-				iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-			}
-		}
-		break;
-	case OTG_STATE_A_HOST:
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_drop ||
-				(iotg->otg.host &&
-				!iotg->otg.host->b_hnp_enable &&
-					!iotg->hsm.a_bus_req)) {
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (!iotg->hsm.a_vbus_vld) {
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (iotg->otg.host &&
-				iotg->otg.host->b_hnp_enable &&
-				!iotg->hsm.a_bus_req) {
-			/* Set HABA to enable hardware assistance to signal
-			 *  A-connect after receiver B-disconnect. Hardware
-			 *  will then set client mode and enable URE, SLE and
-			 *  PCE after the assistance. otg_dummy_irq is used to
-			 *  clean these ints when client driver is not resumed.
-			 */
-			if (request_irq(pdev->irq, otg_dummy_irq, IRQF_SHARED,
-					driver_name, iotg->base) != 0) {
-				dev_dbg(lnw->dev,
-					"request interrupt %d failed\n",
-						pdev->irq);
-			}
-
-			/* set HABA */
-			langwell_otg_HABA(1);
-			iotg->hsm.b_bus_resume = 0;
-			iotg->hsm.a_aidl_bdis_tmout = 0;
-			langwell_otg_loc_sof(0);
-			/* clear PHCD to enable HW timer */
-			langwell_otg_phy_low_power(0);
-			langwell_otg_add_timer(a_aidl_bdis_tmr);
-			iotg->otg.state = OTG_STATE_A_SUSPEND;
-		} else if (!iotg->hsm.b_conn || !iotg->hsm.a_bus_req) {
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		}
-		break;
-	case OTG_STATE_A_SUSPEND:
-		if (iotg->hsm.id) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_req ||
-				iotg->hsm.b_bus_resume) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			iotg->hsm.a_suspend_req = 0;
-			langwell_otg_loc_sof(1);
-			iotg->otg.state = OTG_STATE_A_HOST;
-		} else if (iotg->hsm.a_aidl_bdis_tmout ||
-				iotg->hsm.a_bus_drop) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (!iotg->hsm.b_conn && iotg->otg.host &&
-				iotg->otg.host->b_hnp_enable) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			iotg->hsm.b_bus_suspend = 0;
-			iotg->hsm.b_bus_suspend_vld = 0;
-
-			/* msleep(200); */
-			if (lnw->iotg.start_peripheral)
-				lnw->iotg.start_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver not loaded.\n");
-
-			langwell_otg_add_ktimer(TB_BUS_SUSPEND_TMR);
-			iotg->otg.state = OTG_STATE_A_PERIPHERAL;
-			break;
-		} else if (!iotg->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		}
-		break;
-	case OTG_STATE_A_PERIPHERAL:
-		if (iotg->hsm.id) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_vbus_vld) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (iotg->hsm.a_bus_drop) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (iotg->hsm.b_bus_suspend) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			if (lnw->iotg.start_host)
-				lnw->iotg.start_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		} else if (iotg->hsm.b_bus_suspend_tmout) {
-			u32	val;
-			val = readl(lnw->iotg.base + CI_PORTSC1);
-			if (!(val & PORTSC_SUSP))
-				break;
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			if (lnw->iotg.start_host)
-				lnw->iotg.start_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		}
-		break;
-	case OTG_STATE_A_VBUS_ERR:
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			iotg->hsm.a_clr_err = 0;
-			iotg->hsm.a_srp_det = 0;
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_clr_err) {
-			iotg->hsm.a_clr_err = 0;
-			iotg->hsm.a_srp_det = 0;
-			reset_otg();
-			init_hsm();
-			if (iotg->otg.state == OTG_STATE_A_IDLE)
-				langwell_update_transceiver();
-		} else {
-			/* FW will clear PHCD bit when any VBus
-			 * event detected. Reset PHCD to 1 again */
-			langwell_otg_phy_low_power(1);
-		}
-		break;
-	case OTG_STATE_A_WAIT_VFALL:
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_req) {
-
-			/* Turn on VBus */
-			iotg->otg.set_vbus(&iotg->otg, true);
-			iotg->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
-		} else if (!iotg->hsm.a_sess_vld) {
-			iotg->hsm.a_srp_det = 0;
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-		}
-		break;
-	default:
-		;
-	}
-
-	dev_dbg(lnw->dev, "%s: new state = %s\n", __func__,
-			otg_state_string(iotg->otg.state));
-}
-
-static ssize_t
-show_registers(struct device *_dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size,
-		"\n"
-		"USBCMD = 0x%08x\n"
-		"USBSTS = 0x%08x\n"
-		"USBINTR = 0x%08x\n"
-		"ASYNCLISTADDR = 0x%08x\n"
-		"PORTSC1 = 0x%08x\n"
-		"HOSTPC1 = 0x%08x\n"
-		"OTGSC = 0x%08x\n"
-		"USBMODE = 0x%08x\n",
-		readl(lnw->iotg.base + 0x30),
-		readl(lnw->iotg.base + 0x34),
-		readl(lnw->iotg.base + 0x38),
-		readl(lnw->iotg.base + 0x48),
-		readl(lnw->iotg.base + 0x74),
-		readl(lnw->iotg.base + 0xb4),
-		readl(lnw->iotg.base + 0xf4),
-		readl(lnw->iotg.base + 0xf8)
-	     );
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-static DEVICE_ATTR(registers, S_IRUGO, show_registers, NULL);
-
-static ssize_t
-show_hsm(struct device *_dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	char				*next;
-	unsigned			size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	if (iotg->otg.host)
-		iotg->hsm.a_set_b_hnp_en = iotg->otg.host->b_hnp_enable;
-
-	if (iotg->otg.gadget)
-		iotg->hsm.b_hnp_enable = iotg->otg.gadget->b_hnp_enable;
-
-	t = scnprintf(next, size,
-		"\n"
-		"current state = %s\n"
-		"a_bus_resume = \t%d\n"
-		"a_bus_suspend = \t%d\n"
-		"a_conn = \t%d\n"
-		"a_sess_vld = \t%d\n"
-		"a_srp_det = \t%d\n"
-		"a_vbus_vld = \t%d\n"
-		"b_bus_resume = \t%d\n"
-		"b_bus_suspend = \t%d\n"
-		"b_conn = \t%d\n"
-		"b_se0_srp = \t%d\n"
-		"b_sess_end = \t%d\n"
-		"b_sess_vld = \t%d\n"
-		"id = \t%d\n"
-		"a_set_b_hnp_en = \t%d\n"
-		"b_srp_done = \t%d\n"
-		"b_hnp_enable = \t%d\n"
-		"a_wait_vrise_tmout = \t%d\n"
-		"a_wait_bcon_tmout = \t%d\n"
-		"a_aidl_bdis_tmout = \t%d\n"
-		"b_ase0_brst_tmout = \t%d\n"
-		"a_bus_drop = \t%d\n"
-		"a_bus_req = \t%d\n"
-		"a_clr_err = \t%d\n"
-		"a_suspend_req = \t%d\n"
-		"b_bus_req = \t%d\n"
-		"b_bus_suspend_tmout = \t%d\n"
-		"b_bus_suspend_vld = \t%d\n",
-		otg_state_string(iotg->otg.state),
-		iotg->hsm.a_bus_resume,
-		iotg->hsm.a_bus_suspend,
-		iotg->hsm.a_conn,
-		iotg->hsm.a_sess_vld,
-		iotg->hsm.a_srp_det,
-		iotg->hsm.a_vbus_vld,
-		iotg->hsm.b_bus_resume,
-		iotg->hsm.b_bus_suspend,
-		iotg->hsm.b_conn,
-		iotg->hsm.b_se0_srp,
-		iotg->hsm.b_sess_end,
-		iotg->hsm.b_sess_vld,
-		iotg->hsm.id,
-		iotg->hsm.a_set_b_hnp_en,
-		iotg->hsm.b_srp_done,
-		iotg->hsm.b_hnp_enable,
-		iotg->hsm.a_wait_vrise_tmout,
-		iotg->hsm.a_wait_bcon_tmout,
-		iotg->hsm.a_aidl_bdis_tmout,
-		iotg->hsm.b_ase0_brst_tmout,
-		iotg->hsm.a_bus_drop,
-		iotg->hsm.a_bus_req,
-		iotg->hsm.a_clr_err,
-		iotg->hsm.a_suspend_req,
-		iotg->hsm.b_bus_req,
-		iotg->hsm.b_bus_suspend_tmout,
-		iotg->hsm.b_bus_suspend_vld
-		);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-static DEVICE_ATTR(hsm, S_IRUGO, show_hsm, NULL);
-
-static ssize_t
-get_a_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", lnw->iotg.hsm.a_bus_req);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_a_bus_req(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (!iotg->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		iotg->hsm.a_bus_req = 0;
-		dev_dbg(lnw->dev, "User request: a_bus_req = 0\n");
-	} else if (buf[0] == '1') {
-		/* If a_bus_drop is TRUE, a_bus_req can't be set */
-		if (iotg->hsm.a_bus_drop)
-			return -1;
-		iotg->hsm.a_bus_req = 1;
-		dev_dbg(lnw->dev, "User request: a_bus_req = 1\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUSR, get_a_bus_req, set_a_bus_req);
-
-static ssize_t
-get_a_bus_drop(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", lnw->iotg.hsm.a_bus_drop);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_a_bus_drop(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (!iotg->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		iotg->hsm.a_bus_drop = 0;
-		dev_dbg(lnw->dev, "User request: a_bus_drop = 0\n");
-	} else if (buf[0] == '1') {
-		iotg->hsm.a_bus_drop = 1;
-		iotg->hsm.a_bus_req = 0;
-		dev_dbg(lnw->dev, "User request: a_bus_drop = 1\n");
-		dev_dbg(lnw->dev, "User request: and a_bus_req = 0\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUSR, get_a_bus_drop, set_a_bus_drop);
-
-static ssize_t
-get_b_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", lnw->iotg.hsm.b_bus_req);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_b_bus_req(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (iotg->otg.default_a)
-		return -1;
-
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		iotg->hsm.b_bus_req = 0;
-		dev_dbg(lnw->dev, "User request: b_bus_req = 0\n");
-	} else if (buf[0] == '1') {
-		iotg->hsm.b_bus_req = 1;
-		dev_dbg(lnw->dev, "User request: b_bus_req = 1\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUSR, get_b_bus_req, set_b_bus_req);
-
-static ssize_t
-set_a_clr_err(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (!iotg->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '1') {
-		iotg->hsm.a_clr_err = 1;
-		dev_dbg(lnw->dev, "User request: a_clr_err = 1\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_clr_err, S_IWUSR, NULL, set_a_clr_err);
-
-static struct attribute *inputs_attrs[] = {
-	&dev_attr_a_bus_req.attr,
-	&dev_attr_a_bus_drop.attr,
-	&dev_attr_b_bus_req.attr,
-	&dev_attr_a_clr_err.attr,
-	NULL,
-};
-
-static struct attribute_group debug_dev_attr_group = {
-	.name = "inputs",
-	.attrs = inputs_attrs,
-};
-
-static int langwell_otg_probe(struct pci_dev *pdev,
-		const struct pci_device_id *id)
-{
-	unsigned long		resource, len;
-	void __iomem		*base = NULL;
-	int			retval;
-	u32			val32;
-	struct langwell_otg	*lnw;
-	char			qname[] = "langwell_otg_queue";
-
-	retval = 0;
-	dev_dbg(&pdev->dev, "\notg controller is detected.\n");
-	if (pci_enable_device(pdev) < 0) {
-		retval = -ENODEV;
-		goto done;
-	}
-
-	lnw = kzalloc(sizeof *lnw, GFP_KERNEL);
-	if (lnw == NULL) {
-		retval = -ENOMEM;
-		goto done;
-	}
-	the_transceiver = lnw;
-
-	/* control register: BAR 0 */
-	resource = pci_resource_start(pdev, 0);
-	len = pci_resource_len(pdev, 0);
-	if (!request_mem_region(resource, len, driver_name)) {
-		retval = -EBUSY;
-		goto err;
-	}
-	lnw->region = 1;
-
-	base = ioremap_nocache(resource, len);
-	if (base == NULL) {
-		retval = -EFAULT;
-		goto err;
-	}
-	lnw->iotg.base = base;
-
-	if (!request_mem_region(USBCFG_ADDR, USBCFG_LEN, driver_name)) {
-		retval = -EBUSY;
-		goto err;
-	}
-	lnw->cfg_region = 1;
-
-	/* For the SCCB.USBCFG register */
-	base = ioremap_nocache(USBCFG_ADDR, USBCFG_LEN);
-	if (base == NULL) {
-		retval = -EFAULT;
-		goto err;
-	}
-	lnw->usbcfg = base;
-
-	if (!pdev->irq) {
-		dev_dbg(&pdev->dev, "No IRQ.\n");
-		retval = -ENODEV;
-		goto err;
-	}
-
-	lnw->qwork = create_singlethread_workqueue(qname);
-	if (!lnw->qwork) {
-		dev_dbg(&pdev->dev, "cannot create workqueue %s\n", qname);
-		retval = -ENOMEM;
-		goto err;
-	}
-	INIT_WORK(&lnw->work, langwell_otg_work);
-
-	/* OTG common part */
-	lnw->dev = &pdev->dev;
-	lnw->iotg.otg.dev = lnw->dev;
-	lnw->iotg.otg.label = driver_name;
-	lnw->iotg.otg.set_host = langwell_otg_set_host;
-	lnw->iotg.otg.set_peripheral = langwell_otg_set_peripheral;
-	lnw->iotg.otg.set_power = langwell_otg_set_power;
-	lnw->iotg.otg.set_vbus = langwell_otg_set_vbus;
-	lnw->iotg.otg.start_srp = langwell_otg_start_srp;
-	lnw->iotg.otg.state = OTG_STATE_UNDEFINED;
-
-	if (otg_set_transceiver(&lnw->iotg.otg)) {
-		dev_dbg(lnw->dev, "can't set transceiver\n");
-		retval = -EBUSY;
-		goto err;
-	}
-
-	reset_otg();
-	init_hsm();
-
-	spin_lock_init(&lnw->lock);
-	spin_lock_init(&lnw->wq_lock);
-	INIT_LIST_HEAD(&active_timers);
-	retval = langwell_otg_init_timers(&lnw->iotg.hsm);
-	if (retval) {
-		dev_dbg(&pdev->dev, "Failed to init timers\n");
-		goto err;
-	}
-
-	init_timer(&lnw->hsm_timer);
-	ATOMIC_INIT_NOTIFIER_HEAD(&lnw->iotg.iotg_notifier);
-
-	lnw->iotg_notifier.notifier_call = langwell_otg_iotg_notify;
-
-	retval = intel_mid_otg_register_notifier(&lnw->iotg,
-						&lnw->iotg_notifier);
-	if (retval) {
-		dev_dbg(lnw->dev, "Failed to register notifier\n");
-		goto err;
-	}
-
-	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
-				driver_name, lnw) != 0) {
-		dev_dbg(lnw->dev, "request interrupt %d failed\n", pdev->irq);
-		retval = -EBUSY;
-		goto err;
-	}
-
-	/* enable OTGSC int */
-	val32 = OTGSC_DPIE | OTGSC_BSEIE | OTGSC_BSVIE |
-		OTGSC_ASVIE | OTGSC_AVVIE | OTGSC_IDIE | OTGSC_IDPU;
-	writel(val32, lnw->iotg.base + CI_OTGSC);
-
-	retval = device_create_file(&pdev->dev, &dev_attr_registers);
-	if (retval < 0) {
-		dev_dbg(lnw->dev,
-			"Can't register sysfs attribute: %d\n", retval);
-		goto err;
-	}
-
-	retval = device_create_file(&pdev->dev, &dev_attr_hsm);
-	if (retval < 0) {
-		dev_dbg(lnw->dev, "Can't hsm sysfs attribute: %d\n", retval);
-		goto err;
-	}
-
-	retval = sysfs_create_group(&pdev->dev.kobj, &debug_dev_attr_group);
-	if (retval < 0) {
-		dev_dbg(lnw->dev,
-			"Can't register sysfs attr group: %d\n", retval);
-		goto err;
-	}
-
-	if (lnw->iotg.otg.state == OTG_STATE_A_IDLE)
-		langwell_update_transceiver();
-
-	return 0;
-
-err:
-	if (the_transceiver)
-		langwell_otg_remove(pdev);
-done:
-	return retval;
-}
-
-static void langwell_otg_remove(struct pci_dev *pdev)
-{
-	struct langwell_otg *lnw = the_transceiver;
-
-	if (lnw->qwork) {
-		flush_workqueue(lnw->qwork);
-		destroy_workqueue(lnw->qwork);
-	}
-	intel_mid_otg_unregister_notifier(&lnw->iotg, &lnw->iotg_notifier);
-	langwell_otg_free_timers();
-
-	/* disable OTGSC interrupt as OTGSC doesn't change in reset */
-	writel(0, lnw->iotg.base + CI_OTGSC);
-
-	if (pdev->irq)
-		free_irq(pdev->irq, lnw);
-	if (lnw->usbcfg)
-		iounmap(lnw->usbcfg);
-	if (lnw->cfg_region)
-		release_mem_region(USBCFG_ADDR, USBCFG_LEN);
-	if (lnw->iotg.base)
-		iounmap(lnw->iotg.base);
-	if (lnw->region)
-		release_mem_region(pci_resource_start(pdev, 0),
-				pci_resource_len(pdev, 0));
-
-	otg_set_transceiver(NULL);
-	pci_disable_device(pdev);
-	sysfs_remove_group(&pdev->dev.kobj, &debug_dev_attr_group);
-	device_remove_file(&pdev->dev, &dev_attr_hsm);
-	device_remove_file(&pdev->dev, &dev_attr_registers);
-	kfree(lnw);
-	lnw = NULL;
-}
-
-static void transceiver_suspend(struct pci_dev *pdev)
-{
-	pci_save_state(pdev);
-	pci_set_power_state(pdev, PCI_D3hot);
-	langwell_otg_phy_low_power(1);
-}
-
-static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	int				ret = 0;
-
-	/* Disbale OTG interrupts */
-	langwell_otg_intr(0);
-
-	if (pdev->irq)
-		free_irq(pdev->irq, lnw);
-
-	/* Prevent more otg_work */
-	flush_workqueue(lnw->qwork);
-	destroy_workqueue(lnw->qwork);
-	lnw->qwork = NULL;
-
-	/* start actions */
-	switch (iotg->otg.state) {
-	case OTG_STATE_A_WAIT_VFALL:
-		iotg->otg.state = OTG_STATE_A_IDLE;
-	case OTG_STATE_A_IDLE:
-	case OTG_STATE_B_IDLE:
-	case OTG_STATE_A_VBUS_ERR:
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_WAIT_VRISE:
-		langwell_otg_del_timer(a_wait_vrise_tmr);
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_WAIT_BCON:
-		del_timer_sync(&lnw->hsm_timer);
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_HOST:
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_SUSPEND:
-		langwell_otg_del_timer(a_aidl_bdis_tmr);
-		langwell_otg_HABA(0);
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(lnw->dev, "host driver has been removed.\n");
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_PERIPHERAL:
-		del_timer_sync(&lnw->hsm_timer);
-
-		if (lnw->iotg.stop_peripheral)
-			lnw->iotg.stop_peripheral(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev,
-				"client driver has been removed.\n");
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_B_HOST:
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-		iotg->hsm.b_bus_req = 0;
-		iotg->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_B_PERIPHERAL:
-		if (lnw->iotg.stop_peripheral)
-			lnw->iotg.stop_peripheral(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev,
-				"client driver has been removed.\n");
-		iotg->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_B_WAIT_ACON:
-		/* delete hsm timer for b_ase0_brst_tmr */
-		del_timer_sync(&lnw->hsm_timer);
-
-		langwell_otg_HAAR(0);
-
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-		iotg->hsm.b_bus_req = 0;
-		iotg->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	default:
-		dev_dbg(lnw->dev, "error state before suspend\n");
-		break;
-	}
-
-	return ret;
-}
-
-static void transceiver_resume(struct pci_dev *pdev)
-{
-	pci_restore_state(pdev);
-	pci_set_power_state(pdev, PCI_D0);
-}
-
-static int langwell_otg_resume(struct pci_dev *pdev)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	int			ret = 0;
-
-	transceiver_resume(pdev);
-
-	lnw->qwork = create_singlethread_workqueue("langwell_otg_queue");
-	if (!lnw->qwork) {
-		dev_dbg(&pdev->dev, "cannot create langwell otg workqueuen");
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
-				driver_name, lnw) != 0) {
-		dev_dbg(&pdev->dev, "request interrupt %d failed\n", pdev->irq);
-		ret = -EBUSY;
-		goto error;
-	}
-
-	/* enable OTG interrupts */
-	langwell_otg_intr(1);
-
-	update_hsm();
-
-	langwell_update_transceiver();
-
-	return ret;
-error:
-	langwell_otg_intr(0);
-	transceiver_suspend(pdev);
-	return ret;
-}
-
-static int __init langwell_otg_init(void)
-{
-	return pci_register_driver(&otg_pci_driver);
-}
-module_init(langwell_otg_init);
-
-static void __exit langwell_otg_cleanup(void)
-{
-	pci_unregister_driver(&otg_pci_driver);
-}
-module_exit(langwell_otg_cleanup);
diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h
deleted file mode 100644
index 51f17b16d312..000000000000
--- a/include/linux/usb/langwell_otg.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008 - 2010, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#ifndef __LANGWELL_OTG_H
-#define __LANGWELL_OTG_H
-
-#include <linux/usb/intel_mid_otg.h>
-
-#define CI_USBCMD		0x30
-#	define USBCMD_RST		BIT(1)
-#	define USBCMD_RS		BIT(0)
-#define CI_USBSTS		0x34
-#	define USBSTS_SLI		BIT(8)
-#	define USBSTS_URI		BIT(6)
-#	define USBSTS_PCI		BIT(2)
-#define CI_PORTSC1		0x74
-#	define PORTSC_PP		BIT(12)
-#	define PORTSC_LS		(BIT(11) | BIT(10))
-#	define PORTSC_SUSP		BIT(7)
-#	define PORTSC_CCS		BIT(0)
-#define CI_HOSTPC1		0xb4
-#	define HOSTPC1_PHCD		BIT(22)
-#define CI_OTGSC		0xf4
-#	define OTGSC_DPIE		BIT(30)
-#	define OTGSC_1MSE		BIT(29)
-#	define OTGSC_BSEIE		BIT(28)
-#	define OTGSC_BSVIE		BIT(27)
-#	define OTGSC_ASVIE		BIT(26)
-#	define OTGSC_AVVIE		BIT(25)
-#	define OTGSC_IDIE		BIT(24)
-#	define OTGSC_DPIS		BIT(22)
-#	define OTGSC_1MSS		BIT(21)
-#	define OTGSC_BSEIS		BIT(20)
-#	define OTGSC_BSVIS		BIT(19)
-#	define OTGSC_ASVIS		BIT(18)
-#	define OTGSC_AVVIS		BIT(17)
-#	define OTGSC_IDIS		BIT(16)
-#	define OTGSC_DPS		BIT(14)
-#	define OTGSC_1MST		BIT(13)
-#	define OTGSC_BSE		BIT(12)
-#	define OTGSC_BSV		BIT(11)
-#	define OTGSC_ASV		BIT(10)
-#	define OTGSC_AVV		BIT(9)
-#	define OTGSC_ID			BIT(8)
-#	define OTGSC_HABA		BIT(7)
-#	define OTGSC_HADP		BIT(6)
-#	define OTGSC_IDPU		BIT(5)
-#	define OTGSC_DP			BIT(4)
-#	define OTGSC_OT			BIT(3)
-#	define OTGSC_HAAR		BIT(2)
-#	define OTGSC_VC			BIT(1)
-#	define OTGSC_VD			BIT(0)
-#	define OTGSC_INTEN_MASK		(0x7f << 24)
-#	define OTGSC_INT_MASK		(0x5f << 24)
-#	define OTGSC_INTSTS_MASK	(0x7f << 16)
-#define CI_USBMODE		0xf8
-#	define USBMODE_CM		(BIT(1) | BIT(0))
-#	define USBMODE_IDLE		0
-#	define USBMODE_DEVICE		0x2
-#	define USBMODE_HOST		0x3
-#define USBCFG_ADDR			0xff10801c
-#define USBCFG_LEN			4
-#	define USBCFG_VBUSVAL		BIT(14)
-#	define USBCFG_AVALID		BIT(13)
-#	define USBCFG_BVALID		BIT(12)
-#	define USBCFG_SESEND		BIT(11)
-
-#define INTR_DUMMY_MASK (USBSTS_SLI | USBSTS_URI | USBSTS_PCI)
-
-enum langwell_otg_timer_type {
-	TA_WAIT_VRISE_TMR,
-	TA_WAIT_BCON_TMR,
-	TA_AIDL_BDIS_TMR,
-	TB_ASE0_BRST_TMR,
-	TB_SE0_SRP_TMR,
-	TB_SRP_INIT_TMR,
-	TB_SRP_FAIL_TMR,
-	TB_BUS_SUSPEND_TMR
-};
-
-#define TA_WAIT_VRISE	100
-#define TA_WAIT_BCON	30000
-#define TA_AIDL_BDIS	15000
-#define TB_ASE0_BRST	5000
-#define TB_SE0_SRP	2
-#define TB_SRP_INIT	100
-#define TB_SRP_FAIL	5500
-#define TB_BUS_SUSPEND	500
-
-struct langwell_otg_timer {
-	unsigned long expires;	/* Number of count increase to timeout */
-	unsigned long count;	/* Tick counter */
-	void (*function)(unsigned long);	/* Timeout function */
-	unsigned long data;	/* Data passed to function */
-	struct list_head list;
-};
-
-struct langwell_otg {
-	struct intel_mid_otg_xceiv	iotg;
-	struct device			*dev;
-
-	void __iomem			*usbcfg;	/* SCCBUSB config Reg */
-
-	unsigned			region;
-	unsigned			cfg_region;
-
-	struct work_struct		work;
-	struct workqueue_struct		*qwork;
-	struct timer_list		hsm_timer;
-
-	spinlock_t			lock;
-	spinlock_t			wq_lock;
-
-	struct notifier_block		iotg_notifier;
-};
-
-static inline
-struct langwell_otg *mid_xceiv_to_lnw(struct intel_mid_otg_xceiv *iotg)
-{
-	return container_of(iotg, struct langwell_otg, iotg);
-}
-
-#endif /* __LANGWELL_OTG_H__ */
-- 
cgit v1.2.3


From 1a5e29fc2b90daf71a60329c29a1886fd126169a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:51 -0800
Subject: kernel-doc: fix new warnings in device.h

Fix new kernel-doc warnings:

Warning(include/linux/device.h:299): No description found for parameter 'name'
Warning(include/linux/device.h:299): No description found for parameter 'subsys'
Warning(include/linux/device.h:299): No description found for parameter 'node'
Warning(include/linux/device.h:299): No description found for parameter 'add_dev'
Warning(include/linux/device.h:299): No description found for parameter 'remove_dev'
Warning(include/linux/device.h:685): No description found for parameter 'id'
Warning(include/linux/device.h:1009): No description found for parameter '__driver'
Warning(include/linux/device.h:1009): No description found for parameter '__register'
Warning(include/linux/device.h:1009): No description found for parameter '__unregister'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 5b3adb8f9588..b63fb393aa58 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -279,11 +279,11 @@ struct device *driver_find_device(struct device_driver *drv,
 
 /**
  * struct subsys_interface - interfaces to device functions
- * @name        name of the device function
- * @subsystem   subsytem of the devices to attach to
- * @node        the list of functions registered at the subsystem
- * @add         device hookup to device function handler
- * @remove      device hookup to device function handler
+ * @name:       name of the device function
+ * @subsys:     subsytem of the devices to attach to
+ * @node:       the list of functions registered at the subsystem
+ * @add_dev:    device hookup to device function handler
+ * @remove_dev: device hookup to device function handler
  *
  * Simple interfaces attached to a subsystem. Multiple interfaces can
  * attach to a subsystem and its devices. Unlike drivers, they do not
@@ -612,6 +612,7 @@ struct device_dma_parameters {
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @devt:	For creating the sysfs "dev".
+ * @id:		device instance
  * @devres_lock: Spinlock to protect the resource of the device.
  * @devres_head: The resources list of the device.
  * @knode_class: The node used to add the device to the class list.
@@ -1003,6 +1004,10 @@ extern long sysfs_deprecated;
  * Each module may only use this macro once, and calling it replaces
  * module_init() and module_exit().
  *
+ * @__driver: driver name
+ * @__register: register function for this driver type
+ * @__unregister: unregister function for this driver type
+ *
  * Use this macro to construct bus specific macros for registering
  * drivers, and do not use it on its own.
  */
-- 
cgit v1.2.3


From 0fcd97789028e8ec286a4248c20a71eae239ba61 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:56 -0800
Subject: kernel-doc: fix new warning in usb.h

Fix new kernel-doc warning:

Warning(include/linux/usb.h:1251): No description found for parameter 'num_mapped_sgs'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 27a4e16d2bf1..69d845739bc2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1073,6 +1073,7 @@ typedef void (*usb_complete_t)(struct urb *);
  *	which the host controller driver should use in preference to the
  *	transfer_buffer.
  * @sg: scatter gather buffer list
+ * @num_mapped_sgs: (internal) number of mapped sg entries
  * @num_sgs: number of entries in the sg list
  * @transfer_buffer_length: How big is transfer_buffer.  The transfer may
  *	be broken up into chunks according to the current maximum packet
-- 
cgit v1.2.3


From b82b9183d4f18f9b8c4bb31f223eb6c79b734eb0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 24 Jan 2012 05:16:00 +0000
Subject: team: send only changed options/ports via netlink

This patch changes event message behaviour to send only updated records
instead of whole list. This fixes bug on which userspace receives non-actual
data in case multiple events occur in row.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 136 ++++++++++++++++++++++++++++++++----------------
 include/linux/if_team.h |  10 ++++
 2 files changed, 100 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index ed2a862b835d..6b678f38e5ce 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -92,9 +92,9 @@ struct team_option *__team_find_option(struct team *team, const char *opt_name)
 	return NULL;
 }
 
-int team_options_register(struct team *team,
-			  const struct team_option *option,
-			  size_t option_count)
+int __team_options_register(struct team *team,
+			    const struct team_option *option,
+			    size_t option_count)
 {
 	int i;
 	struct team_option **dst_opts;
@@ -116,8 +116,11 @@ int team_options_register(struct team *team,
 		}
 	}
 
-	for (i = 0; i < option_count; i++)
+	for (i = 0; i < option_count; i++) {
+		dst_opts[i]->changed = true;
+		dst_opts[i]->removed = false;
 		list_add_tail(&dst_opts[i]->list, &team->option_list);
+	}
 
 	kfree(dst_opts);
 	return 0;
@@ -130,10 +133,22 @@ rollback:
 	return err;
 }
 
-EXPORT_SYMBOL(team_options_register);
+static void __team_options_mark_removed(struct team *team,
+					const struct team_option *option,
+					size_t option_count)
+{
+	int i;
+
+	for (i = 0; i < option_count; i++, option++) {
+		struct team_option *del_opt;
 
-static void __team_options_change_check(struct team *team,
-					struct team_option *changed_option);
+		del_opt = __team_find_option(team, option->name);
+		if (del_opt) {
+			del_opt->changed = true;
+			del_opt->removed = true;
+		}
+	}
+}
 
 static void __team_options_unregister(struct team *team,
 				      const struct team_option *option,
@@ -152,12 +167,29 @@ static void __team_options_unregister(struct team *team,
 	}
 }
 
+static void __team_options_change_check(struct team *team);
+
+int team_options_register(struct team *team,
+			  const struct team_option *option,
+			  size_t option_count)
+{
+	int err;
+
+	err = __team_options_register(team, option, option_count);
+	if (err)
+		return err;
+	__team_options_change_check(team);
+	return 0;
+}
+EXPORT_SYMBOL(team_options_register);
+
 void team_options_unregister(struct team *team,
 			     const struct team_option *option,
 			     size_t option_count)
 {
+	__team_options_mark_removed(team, option, option_count);
+	__team_options_change_check(team);
 	__team_options_unregister(team, option, option_count);
-	__team_options_change_check(team, NULL);
 }
 EXPORT_SYMBOL(team_options_unregister);
 
@@ -176,7 +208,8 @@ static int team_option_set(struct team *team, struct team_option *option,
 	if (err)
 		return err;
 
-	__team_options_change_check(team, option);
+	option->changed = true;
+	__team_options_change_check(team);
 	return err;
 }
 
@@ -653,6 +686,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
 		return -ENOENT;
 	}
 
+	port->removed = true;
 	__team_port_change_check(port, false);
 	team_port_list_del_port(team, port);
 	team_adjust_ops(team);
@@ -1200,10 +1234,9 @@ err_fill:
 	return err;
 }
 
-static int team_nl_fill_options_get_changed(struct sk_buff *skb,
-					    u32 pid, u32 seq, int flags,
-					    struct team *team,
-					    struct team_option *changed_option)
+static int team_nl_fill_options_get(struct sk_buff *skb,
+				    u32 pid, u32 seq, int flags,
+				    struct team *team, bool fillall)
 {
 	struct nlattr *option_list;
 	void *hdr;
@@ -1223,12 +1256,19 @@ static int team_nl_fill_options_get_changed(struct sk_buff *skb,
 		struct nlattr *option_item;
 		long arg;
 
+		/* Include only changed options if fill all mode is not on */
+		if (!fillall && !option->changed)
+			continue;
 		option_item = nla_nest_start(skb, TEAM_ATTR_ITEM_OPTION);
 		if (!option_item)
 			goto nla_put_failure;
 		NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_NAME, option->name);
-		if (option == changed_option)
+		if (option->changed) {
 			NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_CHANGED);
+			option->changed = false;
+		}
+		if (option->removed)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_REMOVED);
 		switch (option->type) {
 		case TEAM_OPTION_TYPE_U32:
 			NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_U32);
@@ -1255,13 +1295,13 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int team_nl_fill_options_get(struct sk_buff *skb,
-				    struct genl_info *info, int flags,
-				    struct team *team)
+static int team_nl_fill_options_get_all(struct sk_buff *skb,
+					struct genl_info *info, int flags,
+					struct team *team)
 {
-	return team_nl_fill_options_get_changed(skb, info->snd_pid,
-						info->snd_seq, NLM_F_ACK,
-						team, NULL);
+	return team_nl_fill_options_get(skb, info->snd_pid,
+					info->snd_seq, NLM_F_ACK,
+					team, true);
 }
 
 static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
@@ -1273,7 +1313,7 @@ static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
 	if (!team)
 		return -EINVAL;
 
-	err = team_nl_send_generic(info, team, team_nl_fill_options_get);
+	err = team_nl_send_generic(info, team, team_nl_fill_options_get_all);
 
 	team_nl_team_put(team);
 
@@ -1365,10 +1405,10 @@ team_put:
 	return err;
 }
 
-static int team_nl_fill_port_list_get_changed(struct sk_buff *skb,
-					      u32 pid, u32 seq, int flags,
-					      struct team *team,
-					      struct team_port *changed_port)
+static int team_nl_fill_port_list_get(struct sk_buff *skb,
+				      u32 pid, u32 seq, int flags,
+				      struct team *team,
+				      bool fillall)
 {
 	struct nlattr *port_list;
 	void *hdr;
@@ -1387,12 +1427,19 @@ static int team_nl_fill_port_list_get_changed(struct sk_buff *skb,
 	list_for_each_entry(port, &team->port_list, list) {
 		struct nlattr *port_item;
 
+		/* Include only changed ports if fill all mode is not on */
+		if (!fillall && !port->changed)
+			continue;
 		port_item = nla_nest_start(skb, TEAM_ATTR_ITEM_PORT);
 		if (!port_item)
 			goto nla_put_failure;
 		NLA_PUT_U32(skb, TEAM_ATTR_PORT_IFINDEX, port->dev->ifindex);
-		if (port == changed_port)
+		if (port->changed) {
 			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_CHANGED);
+			port->changed = false;
+		}
+		if (port->removed)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_REMOVED);
 		if (port->linkup)
 			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_LINKUP);
 		NLA_PUT_U32(skb, TEAM_ATTR_PORT_SPEED, port->speed);
@@ -1408,13 +1455,13 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int team_nl_fill_port_list_get(struct sk_buff *skb,
-				      struct genl_info *info, int flags,
-				      struct team *team)
+static int team_nl_fill_port_list_get_all(struct sk_buff *skb,
+					  struct genl_info *info, int flags,
+					  struct team *team)
 {
-	return team_nl_fill_port_list_get_changed(skb, info->snd_pid,
-						  info->snd_seq, NLM_F_ACK,
-						  team, NULL);
+	return team_nl_fill_port_list_get(skb, info->snd_pid,
+					  info->snd_seq, NLM_F_ACK,
+					  team, true);
 }
 
 static int team_nl_cmd_port_list_get(struct sk_buff *skb,
@@ -1427,7 +1474,7 @@ static int team_nl_cmd_port_list_get(struct sk_buff *skb,
 	if (!team)
 		return -EINVAL;
 
-	err = team_nl_send_generic(info, team, team_nl_fill_port_list_get);
+	err = team_nl_send_generic(info, team, team_nl_fill_port_list_get_all);
 
 	team_nl_team_put(team);
 
@@ -1464,8 +1511,7 @@ static struct genl_multicast_group team_change_event_mcgrp = {
 	.name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME,
 };
 
-static int team_nl_send_event_options_get(struct team *team,
-					  struct team_option *changed_option)
+static int team_nl_send_event_options_get(struct team *team)
 {
 	struct sk_buff *skb;
 	int err;
@@ -1475,8 +1521,7 @@ static int team_nl_send_event_options_get(struct team *team,
 	if (!skb)
 		return -ENOMEM;
 
-	err = team_nl_fill_options_get_changed(skb, 0, 0, 0, team,
-					       changed_option);
+	err = team_nl_fill_options_get(skb, 0, 0, 0, team, false);
 	if (err < 0)
 		goto err_fill;
 
@@ -1489,18 +1534,17 @@ err_fill:
 	return err;
 }
 
-static int team_nl_send_event_port_list_get(struct team_port *port)
+static int team_nl_send_event_port_list_get(struct team *team)
 {
 	struct sk_buff *skb;
 	int err;
-	struct net *net = dev_net(port->team->dev);
+	struct net *net = dev_net(team->dev);
 
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
-	err = team_nl_fill_port_list_get_changed(skb, 0, 0, 0,
-						 port->team, port);
+	err = team_nl_fill_port_list_get(skb, 0, 0, 0, team, false);
 	if (err < 0)
 		goto err_fill;
 
@@ -1544,12 +1588,11 @@ static void team_nl_fini(void)
  * Change checkers
  ******************/
 
-static void __team_options_change_check(struct team *team,
-					struct team_option *changed_option)
+static void __team_options_change_check(struct team *team)
 {
 	int err;
 
-	err = team_nl_send_event_options_get(team, changed_option);
+	err = team_nl_send_event_options_get(team);
 	if (err)
 		netdev_warn(team->dev, "Failed to send options change via netlink\n");
 }
@@ -1559,9 +1602,10 @@ static void __team_port_change_check(struct team_port *port, bool linkup)
 {
 	int err;
 
-	if (port->linkup == linkup)
+	if (!port->removed && port->linkup == linkup)
 		return;
 
+	port->changed = true;
 	port->linkup = linkup;
 	if (linkup) {
 		struct ethtool_cmd ecmd;
@@ -1577,7 +1621,7 @@ static void __team_port_change_check(struct team_port *port, bool linkup)
 	port->duplex = 0;
 
 send_event:
-	err = team_nl_send_event_port_list_get(port);
+	err = team_nl_send_event_port_list_get(port->team);
 	if (err)
 		netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink\n",
 			    port->dev->name);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 828181fbad5d..58404b0c5010 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -46,6 +46,10 @@ struct team_port {
 	u32 speed;
 	u8 duplex;
 
+	/* Custom gennetlink interface related flags */
+	bool changed;
+	bool removed;
+
 	struct rcu_head rcu;
 };
 
@@ -72,6 +76,10 @@ struct team_option {
 	enum team_option_type type;
 	int (*getter)(struct team *team, void *arg);
 	int (*setter)(struct team *team, void *arg);
+
+	/* Custom gennetlink interface related flags */
+	bool changed;
+	bool removed;
 };
 
 struct team_mode {
@@ -207,6 +215,7 @@ enum {
 	TEAM_ATTR_OPTION_CHANGED,	/* flag */
 	TEAM_ATTR_OPTION_TYPE,		/* u8 */
 	TEAM_ATTR_OPTION_DATA,		/* dynamic */
+	TEAM_ATTR_OPTION_REMOVED,	/* flag */
 
 	__TEAM_ATTR_OPTION_MAX,
 	TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1,
@@ -227,6 +236,7 @@ enum {
 	TEAM_ATTR_PORT_LINKUP,		/* flag */
 	TEAM_ATTR_PORT_SPEED,		/* u32 */
 	TEAM_ATTR_PORT_DUPLEX,		/* u8 */
+	TEAM_ATTR_PORT_REMOVED,		/* flag */
 
 	__TEAM_ATTR_PORT_MAX,
 	TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1,
-- 
cgit v1.2.3


From e050e3f0a71bf7dc2c148b35caff0234decc8198 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 26 Jan 2012 17:03:19 +0100
Subject: perf: Fix broken interrupt rate throttling

This patch fixes the sampling interrupt throttling mechanism.

It was broken in v3.2. Events were not being unthrottled. The
unthrottling mechanism required that events be checked at each
timer tick.

This patch solves this problem and also separates:

  - unthrottling
  - multiplexing
  - frequency-mode period adjustments

Not all of them need to be executed at each timer tick.

This third version of the patch is based on my original patch +
PeterZ proposal (https://lkml.org/lkml/2012/1/7/87).

At each timer tick, for each context:

  - if the current CPU has throttled events, we unthrottle events

  - if context has frequency-based events, we adjust sampling periods

  - if we have reached the jiffies interval, we multiplex (rotate)

We decoupled rotation (multiplexing) from frequency-mode sampling
period adjustments.  They should not necessarily happen at the same
rate. Multiplexing is subject to jiffies_interval (currently at 1
but could be higher once the tunable is exposed via sysfs).

We have grouped frequency-mode adjustment and unthrottling into the
same routine to minimize code duplication. When throttled while in
frequency mode, we scan the events only once.

We have fixed the threshold enforcement code in __perf_event_overflow().
There was a bug whereby it would allow more than the authorized rate
because an increment of hwc->interrupts was not executed at the right
place.

The patch was tested with low sampling limit (2000) and fixed periods,
frequency mode, overcommitted PMU.

On a 2.1GHz AMD CPU:

 $ cat /proc/sys/kernel/perf_event_max_sample_rate
 2000

We set a rate of 3000 samples/sec (2.1GHz/3000 = 700000):

 $ perf record -e cycles,cycles -c 700000  noploop 10
 $ perf report -D | tail -21

 Aggregated stats:
           TOTAL events:      80086
            MMAP events:         88
            COMM events:          2
            EXIT events:          4
        THROTTLE events:      19996
      UNTHROTTLE events:      19996
          SAMPLE events:      40000

 cycles stats:
           TOTAL events:      40006
            MMAP events:          5
            COMM events:          1
            EXIT events:          4
        THROTTLE events:       9998
      UNTHROTTLE events:       9998
          SAMPLE events:      20000

 cycles stats:
           TOTAL events:      39996
        THROTTLE events:       9998
      UNTHROTTLE events:       9998
          SAMPLE events:      20000

For 10s, the cap is 2x2000x10 = 40000 samples.
We get exactly that: 20000 samples/event.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: <stable@kernel.org> # v3.2+
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120126160319.GA5655@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   1 +
 kernel/events/core.c       | 104 ++++++++++++++++++++++++++++-----------------
 2 files changed, 67 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 08855613ceb3..abb2776be1ba 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -587,6 +587,7 @@ struct hw_perf_event {
 	u64				sample_period;
 	u64				last_period;
 	local64_t			period_left;
+	u64                             interrupts_seq;
 	u64				interrupts;
 
 	u64				freq_time_stamp;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 32b48c889711..ba36013cfb21 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2300,6 +2300,9 @@ do {					\
 	return div64_u64(dividend, divisor);
 }
 
+static DEFINE_PER_CPU(int, perf_throttled_count);
+static DEFINE_PER_CPU(u64, perf_throttled_seq);
+
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 	}
 }
 
-static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
+/*
+ * combine freq adjustment with unthrottling to avoid two passes over the
+ * events. At the same time, make sure, having freq events does not change
+ * the rate of unthrottling as that would introduce bias.
+ */
+static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
+					   int needs_unthr)
 {
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
-	u64 interrupts, now;
+	u64 now, period = TICK_NSEC;
 	s64 delta;
 
-	if (!ctx->nr_freq)
+	/*
+	 * only need to iterate over all events iff:
+	 * - context have events in frequency mode (needs freq adjust)
+	 * - there are events to unthrottle on this cpu
+	 */
+	if (!(ctx->nr_freq || needs_unthr))
 		return;
 
+	raw_spin_lock(&ctx->lock);
+
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (event->state != PERF_EVENT_STATE_ACTIVE)
 			continue;
@@ -2344,13 +2360,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 
 		hwc = &event->hw;
 
-		interrupts = hwc->interrupts;
-		hwc->interrupts = 0;
-
-		/*
-		 * unthrottle events on the tick
-		 */
-		if (interrupts == MAX_INTERRUPTS) {
+		if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
+			hwc->interrupts = 0;
 			perf_log_throttle(event, 1);
 			event->pmu->start(event, 0);
 		}
@@ -2358,14 +2369,26 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 		if (!event->attr.freq || !event->attr.sample_freq)
 			continue;
 
-		event->pmu->read(event);
+		/*
+		 * stop the event and update event->count
+		 */
+		event->pmu->stop(event, PERF_EF_UPDATE);
+
 		now = local64_read(&event->count);
 		delta = now - hwc->freq_count_stamp;
 		hwc->freq_count_stamp = now;
 
+		/*
+		 * restart the event
+		 * reload only if value has changed
+		 */
 		if (delta > 0)
 			perf_adjust_period(event, period, delta);
+
+		event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
 	}
+
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2388,16 +2411,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
  */
 static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 {
-	u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
 	struct perf_event_context *ctx = NULL;
-	int rotate = 0, remove = 1, freq = 0;
+	int rotate = 0, remove = 1;
 
 	if (cpuctx->ctx.nr_events) {
 		remove = 0;
 		if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
 			rotate = 1;
-		if (cpuctx->ctx.nr_freq)
-			freq = 1;
 	}
 
 	ctx = cpuctx->task_ctx;
@@ -2405,37 +2425,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 		remove = 0;
 		if (ctx->nr_events != ctx->nr_active)
 			rotate = 1;
-		if (ctx->nr_freq)
-			freq = 1;
 	}
 
-	if (!rotate && !freq)
+	if (!rotate)
 		goto done;
 
 	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
 	perf_pmu_disable(cpuctx->ctx.pmu);
 
-	if (freq) {
-		perf_ctx_adjust_freq(&cpuctx->ctx, interval);
-		if (ctx)
-			perf_ctx_adjust_freq(ctx, interval);
-	}
-
-	if (rotate) {
-		cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-		if (ctx)
-			ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
+	if (ctx)
+		ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
 
-		rotate_ctx(&cpuctx->ctx);
-		if (ctx)
-			rotate_ctx(ctx);
+	rotate_ctx(&cpuctx->ctx);
+	if (ctx)
+		rotate_ctx(ctx);
 
-		perf_event_sched_in(cpuctx, ctx, current);
-	}
+	perf_event_sched_in(cpuctx, ctx, current);
 
 	perf_pmu_enable(cpuctx->ctx.pmu);
 	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-
 done:
 	if (remove)
 		list_del_init(&cpuctx->rotation_list);
@@ -2445,10 +2454,22 @@ void perf_event_task_tick(void)
 {
 	struct list_head *head = &__get_cpu_var(rotation_list);
 	struct perf_cpu_context *cpuctx, *tmp;
+	struct perf_event_context *ctx;
+	int throttled;
 
 	WARN_ON(!irqs_disabled());
 
+	__this_cpu_inc(perf_throttled_seq);
+	throttled = __this_cpu_xchg(perf_throttled_count, 0);
+
 	list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
+		ctx = &cpuctx->ctx;
+		perf_adjust_freq_unthr_context(ctx, throttled);
+
+		ctx = cpuctx->task_ctx;
+		if (ctx)
+			perf_adjust_freq_unthr_context(ctx, throttled);
+
 		if (cpuctx->jiffies_interval == 1 ||
 				!(jiffies % cpuctx->jiffies_interval))
 			perf_rotate_context(cpuctx);
@@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event,
 {
 	int events = atomic_read(&event->event_limit);
 	struct hw_perf_event *hwc = &event->hw;
+	u64 seq;
 	int ret = 0;
 
 	/*
@@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event,
 	if (unlikely(!is_sampling_event(event)))
 		return 0;
 
-	if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
-		if (throttle) {
+	seq = __this_cpu_read(perf_throttled_seq);
+	if (seq != hwc->interrupts_seq) {
+		hwc->interrupts_seq = seq;
+		hwc->interrupts = 1;
+	} else {
+		hwc->interrupts++;
+		if (unlikely(throttle
+			     && hwc->interrupts >= max_samples_per_tick)) {
+			__this_cpu_inc(perf_throttled_count);
 			hwc->interrupts = MAX_INTERRUPTS;
 			perf_log_throttle(event, 0);
 			ret = 1;
 		}
-	} else
-		hwc->interrupts++;
+	}
 
 	if (event->attr.freq) {
 		u64 now = perf_clock();
-- 
cgit v1.2.3


From 181e9bdef37bfcaa41f3ab6c948a2a0d60a268b5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 29 Jan 2012 20:35:52 +0100
Subject: PM / Hibernate: Fix s2disk regression related to freezing workqueues

Commit 2aede851ddf08666f68ffc17be446420e9d2a056

  PM / Hibernate: Freeze kernel threads after preallocating memory

introduced a mechanism by which kernel threads were frozen after
the preallocation of hibernate image memory to avoid problems with
frozen kernel threads not responding to memory freeing requests.
However, it overlooked the s2disk code path in which the
SNAPSHOT_CREATE_IMAGE ioctl was run directly after SNAPSHOT_FREE,
which caused freeze_workqueues_begin() to BUG(), because it saw
that worqueues had been already frozen.

Although in principle this issue might be addressed by removing
the relevant BUG_ON() from freeze_workqueues_begin(), that would
reintroduce the very problem that commit 2aede851ddf08666f68ffc17be4
attempted to avoid into that particular code path.  For this reason,
to fix the issue at hand, introduce thaw_kernel_threads() and make
the SNAPSHOT_FREE ioctl execute it.

Special thanks to Srivatsa S. Bhat for detailed analysis of the
problem.

Reported-and-tested-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: stable@kernel.org
---
 include/linux/freezer.h |  2 ++
 kernel/power/process.c  | 19 +++++++++++++++++++
 kernel/power/user.c     |  9 +++++++++
 3 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 0ab54e16a91f..d09af4b67cf1 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -39,6 +39,7 @@ extern bool __refrigerator(bool check_kthr_stop);
 extern int freeze_processes(void);
 extern int freeze_kernel_threads(void);
 extern void thaw_processes(void);
+extern void thaw_kernel_threads(void);
 
 static inline bool try_to_freeze(void)
 {
@@ -174,6 +175,7 @@ static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
 static inline int freeze_kernel_threads(void) { return -ENOSYS; }
 static inline void thaw_processes(void) {}
+static inline void thaw_kernel_threads(void) {}
 
 static inline bool try_to_freeze(void) { return false; }
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 77274c9ba2f1..eeca00311f39 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -188,3 +188,22 @@ void thaw_processes(void)
 	printk("done.\n");
 }
 
+void thaw_kernel_threads(void)
+{
+	struct task_struct *g, *p;
+
+	pm_nosig_freezing = false;
+	printk("Restarting kernel threads ... ");
+
+	thaw_workqueues();
+
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
+			__thaw_task(p);
+	} while_each_thread(g, p);
+	read_unlock(&tasklist_lock);
+
+	schedule();
+	printk("done.\n");
+}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6b1ab7a88522..e5a21a857302 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -274,6 +274,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		swsusp_free();
 		memset(&data->handle, 0, sizeof(struct snapshot_handle));
 		data->ready = 0;
+		/*
+		 * It is necessary to thaw kernel threads here, because
+		 * SNAPSHOT_CREATE_IMAGE may be invoked directly after
+		 * SNAPSHOT_FREE.  In that case, if kernel threads were not
+		 * thawed, the preallocation of memory carried out by
+		 * hibernation_snapshot() might run into problems (i.e. it
+		 * might fail or even deadlock).
+		 */
+		thaw_kernel_threads();
 		break;
 
 	case SNAPSHOT_PREF_IMAGE_SIZE:
-- 
cgit v1.2.3


From 1a30871fe635d3e92972e6b93e39ff65bb57e52d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 16 Jan 2012 11:07:16 +0200
Subject: mtd: fix MTD suspend

Commits 3fe4bae88460869a8e553397cd9057a4ee7ca341 and
079c985e7a6f4ce60f931cebfdd5ee3c3 broke MTD suspend in 2 ways:

1. When the '->suspend' method is not present, we return -EOPNOTSUPP, but
   the callers of 'mtd_suspend()' expects 0 instead.
2. Checking of the 'mtd' parameter against NULL has been incorrectly removed
   in 'mtd_cls_suspend()'.

This patch fixes the breakages. This has been found, analyzed, reported
and tested by Rafael J. Wysocki <rjw@sisk.pl>.

Note, this patch is not needed in the stable tree because it causes a
regression introduced during the v3.3 merge window.

Reported-by: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 2 +-
 include/linux/mtd/mtd.h | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 6ae9ca01388b..9a9ce71a71fc 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -119,7 +119,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 {
 	struct mtd_info *mtd = dev_get_drvdata(dev);
 
-	return mtd_suspend(mtd);
+	return mtd ? mtd_suspend(mtd) : 0;
 }
 
 static int mtd_cls_resume(struct device *dev)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 1a81fde8f333..d8c7aad7331c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -427,9 +427,7 @@ static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 
 static inline int mtd_suspend(struct mtd_info *mtd)
 {
-	if (!mtd->suspend)
-		return -EOPNOTSUPP;
-	return mtd->suspend(mtd);
+	return mtd->suspend ? mtd->suspend(mtd) : 0;
 }
 
 static inline void mtd_resume(struct mtd_info *mtd)
-- 
cgit v1.2.3


From 3cccd1543ab623a5065335bf08350e06ffc788ab Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Thu, 26 Jan 2012 19:13:16 +0200
Subject: lib/mpi: replaced MPI_NULL with normal NULL

MPI_NULL is replaced with normal NULL.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Reviewed-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/mpi.h | 2 --
 lib/mpi/mpicoder.c  | 8 ++++----
 lib/mpi/mpiutil.c   | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 06f88994ccaa..d02cca6cc8ce 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -57,8 +57,6 @@ struct gcry_mpi {
 
 typedef struct gcry_mpi *MPI;
 
-#define MPI_NULL NULL
-
 #define mpi_get_nlimbs(a)     ((a)->nlimbs)
 #define mpi_is_neg(a)	      ((a)->sign)
 
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 6116fc4990da..d7684aa7f65c 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -34,7 +34,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits)
 	uint8_t *frame, *fr_pt;
 	int i = 0, n;
 	size_t asnlen = DIM(asn);
-	MPI a = MPI_NULL;
+	MPI a = NULL;
 
 	if (SHA1_DIGEST_LENGTH + asnlen + 4 > nframe)
 		pr_info("MPI: can't encode a %d bit MD into a %d bits frame\n",
@@ -48,7 +48,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits)
 	 */
 	frame = kmalloc(nframe, GFP_KERNEL);
 	if (!frame)
-		return MPI_NULL;
+		return NULL;
 	n = 0;
 	frame[n++] = 0;
 	frame[n++] = 1;		/* block type */
@@ -92,7 +92,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 	int i, j;
 	unsigned nbits, nbytes, nlimbs, nread = 0;
 	mpi_limb_t a;
-	MPI val = MPI_NULL;
+	MPI val = NULL;
 
 	if (*ret_nread < 2)
 		goto leave;
@@ -109,7 +109,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 	nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
 	val = mpi_alloc(nlimbs);
 	if (!val)
-		return MPI_NULL;
+		return NULL;
 	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
 	i %= BYTES_PER_MPI_LIMB;
 	val->nbits = nbits;
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index eefc55d6b7f5..6bfc41f62b8f 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -135,7 +135,7 @@ int mpi_copy(MPI *copied, const MPI a)
 	size_t i;
 	MPI b;
 
-	*copied = MPI_NULL;
+	*copied = NULL;
 
 	if (a) {
 		b = mpi_alloc(a->nlimbs);
-- 
cgit v1.2.3


From e9c8d7a03e69093e4c33c5056a45c1233a42e8a4 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 18 Jan 2012 10:14:25 +0100
Subject: dma: sh_dma: not all SH DMAC implementations support MEMCPY

Add a flag to allow platforms to specify, whether a DMAC instance supports
the MEMCPY operation. To avoid regressions, preserve the current default.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/shdma.c    | 3 ++-
 include/linux/sh_dma.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 54043cd831c8..812fd76e9c18 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -1262,7 +1262,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 
 	INIT_LIST_HEAD(&shdev->common.channels);
 
-	dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+	if (!pdata->slave_only)
+		dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
 	if (pdata->slave && pdata->slave_num)
 		dma_cap_set(DMA_SLAVE, shdev->common.cap_mask);
 
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index 8cd7fe59cf1a..425450b980b8 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -70,6 +70,7 @@ struct sh_dmae_pdata {
 	unsigned int needs_tend_set:1;
 	unsigned int no_dmars:1;
 	unsigned int chclr_present:1;
+	unsigned int slave_only:1;
 };
 
 /* DMA register */
-- 
cgit v1.2.3


From b18db3d91234c03ad080d317878c7c77672ba326 Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Wed, 1 Feb 2012 09:12:24 -0800
Subject: Input: gpio_keys - fix struct device declared inside parameter list

A struct device parameter is used in the enable and disable callbacks to
distinguish between different gpio_keys devices.

Platforms that don't use these callbacks may not include struct device
at all, as seen on arch/arm/mach-s3c2410/mach-n30.c

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/gpio_keys.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index b5ca4b2c08ec..004ff33ab38e 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -1,6 +1,8 @@
 #ifndef _GPIO_KEYS_H
 #define _GPIO_KEYS_H
 
+struct device;
+
 struct gpio_keys_button {
 	/* Configuration parameters */
 	unsigned int code;	/* input event code (KEY_*, SW_*) */
-- 
cgit v1.2.3


From 7d731019218e49a9811f6d0adec4b1cfcb752bed Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 1 Feb 2012 11:10:24 -0800
Subject: mtd: fix merge conflict resolution breakage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes merge conflict resolution breakage introduced by merge
d3712b9dfcf4 ("Merge tag 'for-linus' of git://github.com/prasad-joshi/logfs_upstream").

The commit changed 'mtd_can_have_bb()' function and made it always
return zero, which is incorrect.  Instead, we need it to return whether
the underlying flash device can have bad eraseblocks or not.  UBI needs
this information because it affects how it handles the underlying flash.
E.g., if the underlying flash is NOR, it cannot have bad blocks and any
write or erase error is fatal, and all we can do is to switch to R/O
mode.  We do not need to reserve a pool of good eraseblocks for bad
eraseblocks handling, and so on.

This patch also removes 'mtd_can_have_bb()' invocations from Logfs to
ensure correct Logfs behavior.

I've tested that with this patch UBI works on top of NOR and NAND
flashes emulated by mtdram and nandsim correspondingly.

This patch is based on patch from Linus Torvalds.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Acked-by: Jörn Engel <joern@logfs.org>
Acked-by: Prasad Joshi <prasadjoshi.linux@gmail.com>
Acked-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/logfs/dev_mtd.c      | 6 ------
 include/linux/mtd/mtd.h | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index e97404d611e0..9c501449450d 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs)
 	filler_t *filler = logfs_mtd_readpage;
 	struct mtd_info *mtd = super->s_mtd;
 
-	if (!mtd_can_have_bb(mtd))
-		return NULL;
-
 	*ofs = 0;
 	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs += mtd->erasesize;
@@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs)
 	filler_t *filler = logfs_mtd_readpage;
 	struct mtd_info *mtd = super->s_mtd;
 
-	if (!mtd_can_have_bb(mtd))
-		return NULL;
-
 	*ofs = mtd->size - mtd->erasesize;
 	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs -= mtd->erasesize;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 221295208fd0..887ebe318c75 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -489,7 +489,7 @@ static inline int mtd_has_oob(const struct mtd_info *mtd)
 
 static inline int mtd_can_have_bb(const struct mtd_info *mtd)
 {
-	return 0;
+	return !!mtd->block_isbad;
 }
 
 	/* Kernel-side ioctl definitions */
-- 
cgit v1.2.3


From 8cdb878dcb359fd1137e9abdee9322f5e9bcfdf8 Mon Sep 17 00:00:00 2001
From: Christopher Yeoh <cyeoh@au1.ibm.com>
Date: Thu, 2 Feb 2012 11:34:09 +1030
Subject: Fix race in process_vm_rw_core

This fixes the race in process_vm_core found by Oleg (see

  http://article.gmane.org/gmane.linux.kernel/1235667/

for details).

This has been updated since I last sent it as the creation of the new
mm_access() function did almost exactly the same thing as parts of the
previous version of this patch did.

In order to use mm_access() even when /proc isn't enabled, we move it to
kernel/fork.c where other related process mm access functions already
are.

Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c         | 20 --------------------
 include/linux/sched.h  |  6 ++++++
 kernel/fork.c          | 20 ++++++++++++++++++++
 mm/process_vm_access.c | 23 +++++++++--------------
 4 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index d9512bd03e6c..d4548dd49b02 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
 	return result;
 }
 
-static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
-{
-	struct mm_struct *mm;
-	int err;
-
-	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
-	if (err)
-		return ERR_PTR(err);
-
-	mm = get_task_mm(task);
-	if (mm && mm != current->mm &&
-			!ptrace_may_access(task, mode)) {
-		mmput(mm);
-		mm = ERR_PTR(-EACCES);
-	}
-	mutex_unlock(&task->signal->cred_guard_mutex);
-
-	return mm;
-}
-
 struct mm_struct *mm_for_maps(struct task_struct *task)
 {
 	return mm_access(task, PTRACE_MODE_READ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2234985a5e65..7d379a6bfd88 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2259,6 +2259,12 @@ static inline void mmdrop(struct mm_struct * mm)
 extern void mmput(struct mm_struct *);
 /* Grab a reference to a task's mm, if it is not already going away */
 extern struct mm_struct *get_task_mm(struct task_struct *task);
+/*
+ * Grab a reference to a task's mm, if it is not already going away
+ * and ptrace_may_access with the mode parameter passed to it
+ * succeeds.
+ */
+extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
 /* Allocate a new mm structure and copy contents from tsk->mm */
diff --git a/kernel/fork.c b/kernel/fork.c
index 051f090d40c1..1b2ef3c23ae4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -647,6 +647,26 @@ struct mm_struct *get_task_mm(struct task_struct *task)
 }
 EXPORT_SYMBOL_GPL(get_task_mm);
 
+struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
+{
+	struct mm_struct *mm;
+	int err;
+
+	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
+	if (err)
+		return ERR_PTR(err);
+
+	mm = get_task_mm(task);
+	if (mm && mm != current->mm &&
+			!ptrace_may_access(task, mode)) {
+		mmput(mm);
+		mm = ERR_PTR(-EACCES);
+	}
+	mutex_unlock(&task->signal->cred_guard_mutex);
+
+	return mm;
+}
+
 /* Please note the differences between mmput and mm_release.
  * mmput is called whenever we stop holding onto a mm_struct,
  * error success whatever.
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index e920aa3ce104..c20ff48994c2 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -298,23 +298,18 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
 		goto free_proc_pages;
 	}
 
-	task_lock(task);
-	if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
-		task_unlock(task);
-		rc = -EPERM;
-		goto put_task_struct;
-	}
-	mm = task->mm;
-
-	if (!mm || (task->flags & PF_KTHREAD)) {
-		task_unlock(task);
-		rc = -EINVAL;
+	mm = mm_access(task, PTRACE_MODE_ATTACH);
+	if (!mm || IS_ERR(mm)) {
+		rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+		/*
+		 * Explicitly map EACCES to EPERM as EPERM is a more a
+		 * appropriate error code for process_vw_readv/writev
+		 */
+		if (rc == -EACCES)
+			rc = -EPERM;
 		goto put_task_struct;
 	}
 
-	atomic_inc(&mm->mm_users);
-	task_unlock(task);
-
 	for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) {
 		rc = process_vm_rw_single_vec(
 			(unsigned long)rvec[i].iov_base, rvec[i].iov_len,
-- 
cgit v1.2.3


From ff05f603c3238010769787f3ba54c48c290ed3e5 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Thu, 2 Feb 2012 15:29:08 -0800
Subject: include/linux/lp8727.h: Remove executable bit

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lp8727.h | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 include/linux/lp8727.h

(limited to 'include/linux')

diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h
old mode 100755
new mode 100644
-- 
cgit v1.2.3


From d020283dc694c9ec31b410f522252f7a8397e67d Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venki@google.com>
Date: Fri, 3 Feb 2012 22:22:25 +0100
Subject: PM / QoS: CPU C-state breakage with PM Qos change

Looks like change "PM QoS: Move and rename the implementation files"
merged during the 3.2 development cycle made PM QoS depend on
CONFIG_PM which depends on (PM_SLEEP || PM_RUNTIME).

That breaks CPU C-states with kernels not having these CONFIGs, causing CPUs
to spend time in Polling loop idle instead of going into deep C-states,
consuming way way more power. This is with either acpi idle or intel idle
enabled.

Either CONFIG_PM should be enabled with any pm_qos users or
the !CONFIG_PM pm_qos_request() should return sane defaults not to break
the existing users. Here's is the patch for the latter option.

[rjw: Modified the changelog slightly.]

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@vger.kernel.org
---
 include/linux/pm_qos.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index e5bbcbaa6f57..4d99e4e6ef83 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -110,7 +110,19 @@ static inline void pm_qos_remove_request(struct pm_qos_request *req)
 			{ return; }
 
 static inline int pm_qos_request(int pm_qos_class)
-			{ return 0; }
+{
+	switch (pm_qos_class) {
+	case PM_QOS_CPU_DMA_LATENCY:
+		return PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE;
+	case PM_QOS_NETWORK_LATENCY:
+		return PM_QOS_NETWORK_LAT_DEFAULT_VALUE;
+	case PM_QOS_NETWORK_THROUGHPUT:
+		return PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE;
+	default:
+		return PM_QOS_DEFAULT_VALUE;
+	}
+}
+
 static inline int pm_qos_add_notifier(int pm_qos_class,
 				      struct notifier_block *notifier)
 			{ return 0; }
-- 
cgit v1.2.3