From 49920bc66984a512f4bcc7735a61642cd0e4d6f2 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Thu, 13 Oct 2011 15:15:27 +0530
Subject: dmaengine: add new enum dma_transfer_direction

This new enum removes usage of dma_data_direction for dma direction. The new
enum cleans tells the DMA direction and mode
This further paves way for merging the dmaengine _prep operations and also for
interleaved dma

Suggested-by: Jassi Brar <jaswinder.singh@linaro.org>
Reviewed-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/dmaengine.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ace51af4369f..d946ef7f5e67 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -23,7 +23,6 @@
 
 #include <linux/device.h>
 #include <linux/uio.h>
-#include <linux/dma-direction.h>
 #include <linux/scatterlist.h>
 
 /**
@@ -75,6 +74,19 @@ enum dma_transaction_type {
 /* last transaction type for creation of the capabilities mask */
 #define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
 
+/**
+ * enum dma_transfer_direction - dma transfer mode and direction indicator
+ * @DMA_MEM_TO_MEM: Async/Memcpy mode
+ * @DMA_MEM_TO_DEV: Slave mode & From Memory to Device
+ * @DMA_DEV_TO_MEM: Slave mode & From Device to Memory
+ * @DMA_DEV_TO_DEV: Slave mode & From Device to Device
+ */
+enum dma_transfer_direction {
+	DMA_MEM_TO_MEM,
+	DMA_MEM_TO_DEV,
+	DMA_DEV_TO_MEM,
+	DMA_DEV_TO_DEV,
+};
 
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
@@ -267,7 +279,7 @@ enum dma_slave_buswidth {
  * struct, if applicable.
  */
 struct dma_slave_config {
-	enum dma_data_direction direction;
+	enum dma_transfer_direction direction;
 	dma_addr_t src_addr;
 	dma_addr_t dst_addr;
 	enum dma_slave_buswidth src_addr_width;
@@ -490,11 +502,11 @@ struct dma_device {
 
 	struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction);
+		size_t period_len, enum dma_transfer_direction direction);
 	int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		unsigned long arg);
 
@@ -520,7 +532,7 @@ static inline int dmaengine_slave_config(struct dma_chan *chan,
 
 static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single(
 	struct dma_chan *chan, void *buf, size_t len,
-	enum dma_data_direction dir, unsigned long flags)
+	enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct scatterlist sg;
 	sg_init_one(&sg, buf, len);
-- 
cgit v1.2.3


From db8196df4bb6f117caa163aa73b0f16fd62290bd Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@linux.intel.com>
Date: Thu, 13 Oct 2011 22:34:23 +0530
Subject: dmaengine: move drivers to dma_transfer_direction

fixup usage of dma direction by introducing dma_transfer_direction,
this patch moves dma/drivers/* to use new enum

Cc: Jassi Brar <jaswinder.singh@linaro.org>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Viresh Kumar <viresh.kumar@st.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Cc: Mika Westerberg <mika.westerberg@iki.fi>
Cc: H Hartley Sweeten <hartleys@visionengravers.com>
Cc: Li Yang <leoli@freescale.com>
Cc: Zhang Wei <zw@zh-kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Cc: Shawn Guo <shawn.guo@freescale.com>
Cc: Yong Wang <yong.y.wang@intel.com>
Cc: Tomoya MORINAGA <tomoya-linux@dsn.lapis-semi.com>
Cc: Boojin Kim <boojin.kim@samsung.com>
Cc: Barry Song <Baohua.Song@csr.com>
Acked-by: Mika Westerberg <mika.westerberg@iki.fi>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@st.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 arch/arm/mach-ep93xx/include/mach/dma.h        |  6 +++---
 arch/arm/plat-nomadik/include/plat/ste_dma40.h |  4 ++--
 drivers/dma/amba-pl08x.c                       | 24 ++++++++++++------------
 drivers/dma/at_hdmac.c                         | 22 +++++++++++-----------
 drivers/dma/coh901318.c                        | 12 ++++++------
 drivers/dma/coh901318_lli.c                    | 23 +++++++++++------------
 drivers/dma/coh901318_lli.h                    |  4 ++--
 drivers/dma/dw_dmac.c                          | 14 +++++++-------
 drivers/dma/ep93xx_dma.c                       | 22 +++++++++++-----------
 drivers/dma/fsldma.c                           |  4 ++--
 drivers/dma/imx-dma.c                          | 10 +++++-----
 drivers/dma/imx-sdma.c                         | 10 +++++-----
 drivers/dma/intel_mid_dma.c                    | 14 +++++++-------
 drivers/dma/intel_mid_dma_regs.h               |  2 +-
 drivers/dma/ipu/ipu_idmac.c                    |  4 ++--
 drivers/dma/mxs-dma.c                          |  8 ++++----
 drivers/dma/pch_dma.c                          | 12 ++++++------
 drivers/dma/pl330.c                            | 18 +++++++++---------
 drivers/dma/shdma.c                            | 25 ++++++++++++-------------
 drivers/dma/ste_dma40.c                        | 26 +++++++++++++-------------
 drivers/dma/timb_dma.c                         | 18 +++++++++---------
 drivers/dma/txx9dmac.c                         | 12 ++++++------
 include/linux/amba/pl08x.h                     |  4 ++--
 include/linux/dw_dmac.h                        |  2 +-
 include/linux/sh_dma.h                         |  2 +-
 25 files changed, 150 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ep93xx/include/mach/dma.h b/arch/arm/mach-ep93xx/include/mach/dma.h
index 46d4d876e6fb..e82c642fa53c 100644
--- a/arch/arm/mach-ep93xx/include/mach/dma.h
+++ b/arch/arm/mach-ep93xx/include/mach/dma.h
@@ -37,7 +37,7 @@
  */
 struct ep93xx_dma_data {
 	int				port;
-	enum dma_data_direction		direction;
+	enum dma_transfer_direction	direction;
 	const char			*name;
 };
 
@@ -80,14 +80,14 @@ static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan)
  * channel supports given DMA direction. Only M2P channels have such
  * limitation, for M2M channels the direction is configurable.
  */
-static inline enum dma_data_direction
+static inline enum dma_transfer_direction
 ep93xx_dma_chan_direction(struct dma_chan *chan)
 {
 	if (!ep93xx_dma_chan_is_m2p(chan))
 		return DMA_NONE;
 
 	/* even channels are for TX, odd for RX */
-	return (chan->chan_id % 2 == 0) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
 }
 
 #endif /* __ASM_ARCH_DMA_H */
diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 685c78716d95..38b041a40db4 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -187,7 +187,7 @@ static inline struct
 dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
 					    dma_addr_t addr,
 					    unsigned int size,
-					    enum dma_data_direction direction,
+					    enum dma_transfer_direction direction,
 					    unsigned long flags)
 {
 	struct scatterlist sg;
@@ -209,7 +209,7 @@ static inline struct
 dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
 					    dma_addr_t addr,
 					    unsigned int size,
-					    enum dma_data_direction direction,
+					    enum dma_transfer_direction direction,
 					    unsigned long flags)
 {
 	return NULL;
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index b7cbd1ab1db1..41c62fd0680d 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -882,9 +882,9 @@ static int prep_phy_channel(struct pl08x_dma_chan *plchan,
 		ch->signal = ret;
 
 		/* Assign the flow control signal to this channel */
-		if (txd->direction == DMA_TO_DEVICE)
+		if (txd->direction == DMA_MEM_TO_DEV)
 			txd->ccfg |= ch->signal << PL080_CONFIG_DST_SEL_SHIFT;
-		else if (txd->direction == DMA_FROM_DEVICE)
+		else if (txd->direction == DMA_DEV_TO_MEM)
 			txd->ccfg |= ch->signal << PL080_CONFIG_SRC_SEL_SHIFT;
 	}
 
@@ -1102,10 +1102,10 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 
 	/* Transfer direction */
 	plchan->runtime_direction = config->direction;
-	if (config->direction == DMA_TO_DEVICE) {
+	if (config->direction == DMA_MEM_TO_DEV) {
 		addr_width = config->dst_addr_width;
 		maxburst = config->dst_maxburst;
-	} else if (config->direction == DMA_FROM_DEVICE) {
+	} else if (config->direction == DMA_DEV_TO_MEM) {
 		addr_width = config->src_addr_width;
 		maxburst = config->src_maxburst;
 	} else {
@@ -1136,7 +1136,7 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 	cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
 	cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
 
-	if (plchan->runtime_direction == DMA_FROM_DEVICE) {
+	if (plchan->runtime_direction == DMA_DEV_TO_MEM) {
 		plchan->src_addr = config->src_addr;
 		plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
 			pl08x_select_bus(plchan->cd->periph_buses,
@@ -1152,7 +1152,7 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 		"configured channel %s (%s) for %s, data width %d, "
 		"maxburst %d words, LE, CCTL=0x%08x\n",
 		dma_chan_name(chan), plchan->name,
-		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		(config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
 		addr_width,
 		maxburst,
 		cctl);
@@ -1322,7 +1322,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 
 static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
@@ -1354,10 +1354,10 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	 */
 	txd->direction = direction;
 
-	if (direction == DMA_TO_DEVICE) {
+	if (direction == DMA_MEM_TO_DEV) {
 		txd->cctl = plchan->dst_cctl;
 		slave_addr = plchan->dst_addr;
-	} else if (direction == DMA_FROM_DEVICE) {
+	} else if (direction == DMA_DEV_TO_MEM) {
 		txd->cctl = plchan->src_cctl;
 		slave_addr = plchan->src_addr;
 	} else {
@@ -1368,10 +1368,10 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	}
 
 	if (plchan->cd->device_fc)
-		tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER_PER :
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
 			PL080_FLOW_PER2MEM_PER;
 	else
-		tmp = (direction == DMA_TO_DEVICE) ? PL080_FLOW_MEM2PER :
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
 			PL080_FLOW_PER2MEM;
 
 	txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
@@ -1387,7 +1387,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 		list_add_tail(&dsg->node, &txd->dsg_list);
 
 		dsg->len = sg_dma_len(sg);
-		if (direction == DMA_TO_DEVICE) {
+		if (direction == DMA_MEM_TO_DEV) {
 			dsg->src_addr = sg_phys(sg);
 			dsg->dst_addr = slave_addr;
 		} else {
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index fcfa0a8b5c59..7e76574e83ec 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -660,7 +660,7 @@ err_desc_get:
  */
 static struct dma_async_tx_descriptor *
 atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
@@ -678,7 +678,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n",
 			sg_len,
-			direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
 			flags);
 
 	if (unlikely(!atslave || !sg_len)) {
@@ -692,7 +692,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	ctrlb = ATC_IEN;
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		ctrla |=  ATC_DST_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
 			| ATC_SRC_ADDR_MODE_INCR
@@ -725,7 +725,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			total_len += len;
 		}
 		break;
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		ctrla |=  ATC_SRC_WIDTH(reg_width);
 		ctrlb |=  ATC_DST_ADDR_MODE_INCR
 			| ATC_SRC_ADDR_MODE_FIXED
@@ -787,7 +787,7 @@ err_desc_get:
  */
 static int
 atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	if (period_len > (ATC_BTSIZE_MAX << reg_width))
 		goto err_out;
@@ -795,7 +795,7 @@ atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
 		goto err_out;
 	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
 		goto err_out;
-	if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+	if (unlikely(!(direction & (DMA_DEV_TO_MEM | DMA_MEM_TO_DEV))))
 		goto err_out;
 
 	return 0;
@@ -810,7 +810,7 @@ err_out:
 static int
 atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 		unsigned int period_index, dma_addr_t buf_addr,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	u32		ctrla;
 	unsigned int	reg_width = atslave->reg_width;
@@ -822,7 +822,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 		| period_len >> reg_width;
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		desc->lli.saddr = buf_addr + (period_len * period_index);
 		desc->lli.daddr = atslave->tx_reg;
 		desc->lli.ctrla = ctrla;
@@ -833,7 +833,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
 				| ATC_DIF(AT_DMA_PER_IF);
 		break;
 
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		desc->lli.saddr = atslave->rx_reg;
 		desc->lli.daddr = buf_addr + (period_len * period_index);
 		desc->lli.ctrla = ctrla;
@@ -861,7 +861,7 @@ atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc,
  */
 static struct dma_async_tx_descriptor *
 atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
@@ -872,7 +872,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	unsigned int		i;
 
 	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n",
-			direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE",
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
 			buf_addr,
 			periods, buf_len, period_len);
 
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index 4234f416ef11..d65a718c0f9b 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -39,7 +39,7 @@ struct coh901318_desc {
 	struct scatterlist *sg;
 	unsigned int sg_len;
 	struct coh901318_lli *lli;
-	enum dma_data_direction dir;
+	enum dma_transfer_direction dir;
 	unsigned long flags;
 	u32 head_config;
 	u32 head_ctrl;
@@ -1034,7 +1034,7 @@ coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 static struct dma_async_tx_descriptor *
 coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_data_direction direction,
+			unsigned int sg_len, enum dma_transfer_direction direction,
 			unsigned long flags)
 {
 	struct coh901318_chan *cohc = to_coh901318_chan(chan);
@@ -1077,7 +1077,7 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	ctrl_last |= cohc->runtime_ctrl;
 	ctrl |= cohc->runtime_ctrl;
 
-	if (direction == DMA_TO_DEVICE) {
+	if (direction == DMA_MEM_TO_DEV) {
 		u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
 			COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE;
 
@@ -1085,7 +1085,7 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		ctrl_chained |= tx_flags;
 		ctrl_last |= tx_flags;
 		ctrl |= tx_flags;
-	} else if (direction == DMA_FROM_DEVICE) {
+	} else if (direction == DMA_DEV_TO_MEM) {
 		u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST |
 			COH901318_CX_CTRL_DST_ADDR_INC_ENABLE;
 
@@ -1274,11 +1274,11 @@ static void coh901318_dma_set_runtimeconfig(struct dma_chan *chan,
 	int i = 0;
 
 	/* We only support mem to per or per to mem transfers */
-	if (config->direction == DMA_FROM_DEVICE) {
+	if (config->direction == DMA_DEV_TO_MEM) {
 		addr = config->src_addr;
 		addr_width = config->src_addr_width;
 		maxburst = config->src_maxburst;
-	} else if (config->direction == DMA_TO_DEVICE) {
+	} else if (config->direction == DMA_MEM_TO_DEV) {
 		addr = config->dst_addr;
 		addr_width = config->dst_addr_width;
 		maxburst = config->dst_maxburst;
diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c
index 9f7e0e6a7eea..6c0e2d4c6682 100644
--- a/drivers/dma/coh901318_lli.c
+++ b/drivers/dma/coh901318_lli.c
@@ -7,11 +7,10 @@
  * Author: Per Friden <per.friden@stericsson.com>
  */
 
-#include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
-#include <linux/dmapool.h>
 #include <linux/memory.h>
 #include <linux/gfp.h>
+#include <linux/dmapool.h>
 #include <mach/coh901318.h>
 
 #include "coh901318_lli.h"
@@ -177,18 +176,18 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
 			  struct coh901318_lli *lli,
 			  dma_addr_t buf, unsigned int size,
 			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom,
-			  enum dma_data_direction dir)
+			  enum dma_transfer_direction dir)
 {
 	int s = size;
 	dma_addr_t src;
 	dma_addr_t dst;
 
 
-	if (dir == DMA_TO_DEVICE) {
+	if (dir == DMA_MEM_TO_DEV) {
 		src = buf;
 		dst = dev_addr;
 
-	} else if (dir == DMA_FROM_DEVICE) {
+	} else if (dir == DMA_DEV_TO_MEM) {
 
 		src = dev_addr;
 		dst = buf;
@@ -215,9 +214,9 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
 
 		lli = coh901318_lli_next(lli);
 
-		if (dir == DMA_TO_DEVICE)
+		if (dir == DMA_MEM_TO_DEV)
 			src += block_size;
-		else if (dir == DMA_FROM_DEVICE)
+		else if (dir == DMA_DEV_TO_MEM)
 			dst += block_size;
 	}
 
@@ -234,7 +233,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 		      struct scatterlist *sgl, unsigned int nents,
 		      dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl,
 		      u32 ctrl_last,
-		      enum dma_data_direction dir, u32 ctrl_irq_mask)
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask)
 {
 	int i;
 	struct scatterlist *sg;
@@ -249,9 +248,9 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 
 	spin_lock(&pool->lock);
 
-	if (dir == DMA_TO_DEVICE)
+	if (dir == DMA_MEM_TO_DEV)
 		dst = dev_addr;
-	else if (dir == DMA_FROM_DEVICE)
+	else if (dir == DMA_DEV_TO_MEM)
 		src = dev_addr;
 	else
 		goto err;
@@ -269,7 +268,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 			ctrl_sg = ctrl ? ctrl : ctrl_last;
 
 
-		if (dir == DMA_TO_DEVICE)
+		if (dir == DMA_MEM_TO_DEV)
 			/* increment source address */
 			src = sg_phys(sg);
 		else
@@ -293,7 +292,7 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 			lli->src_addr = src;
 			lli->dst_addr = dst;
 
-			if (dir == DMA_FROM_DEVICE)
+			if (dir == DMA_DEV_TO_MEM)
 				dst += elem_size;
 			else
 				src += elem_size;
diff --git a/drivers/dma/coh901318_lli.h b/drivers/dma/coh901318_lli.h
index 7a5c80990e9e..abff3714fdda 100644
--- a/drivers/dma/coh901318_lli.h
+++ b/drivers/dma/coh901318_lli.h
@@ -97,7 +97,7 @@ coh901318_lli_fill_single(struct coh901318_pool *pool,
 			  struct coh901318_lli *lli,
 			  dma_addr_t buf, unsigned int size,
 			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last,
-			  enum dma_data_direction dir);
+			  enum dma_transfer_direction dir);
 
 /**
  * coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer
@@ -119,6 +119,6 @@ coh901318_lli_fill_sg(struct coh901318_pool *pool,
 		      struct scatterlist *sg, unsigned int nents,
 		      dma_addr_t dev_addr, u32 ctrl_chained,
 		      u32 ctrl, u32 ctrl_last,
-		      enum dma_data_direction dir, u32 ctrl_irq_mask);
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask);
 
 #endif /* COH901318_LLI_H */
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 9bfd6d360718..decca1c3c83d 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -696,7 +696,7 @@ err_desc_get:
 
 static struct dma_async_tx_descriptor *
 dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
@@ -720,7 +720,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	prev = first = NULL;
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_DST_WIDTH(reg_width)
 				| DWC_CTLL_DST_FIX
@@ -777,7 +777,7 @@ slave_sg_todev_fill_desc:
 				goto slave_sg_todev_fill_desc;
 		}
 		break;
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_SRC_WIDTH(reg_width)
 				| DWC_CTLL_DST_INC
@@ -1165,7 +1165,7 @@ EXPORT_SYMBOL(dw_dma_cyclic_stop);
  */
 struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_data_direction direction)
+		enum dma_transfer_direction direction)
 {
 	struct dw_dma_chan		*dwc = to_dw_dma_chan(chan);
 	struct dw_cyclic_desc		*cdesc;
@@ -1206,7 +1206,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		goto out_err;
 	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
 		goto out_err;
-	if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+	if (unlikely(!(direction & (DMA_MEM_TO_DEV | DMA_DEV_TO_MEM))))
 		goto out_err;
 
 	retval = ERR_PTR(-ENOMEM);
@@ -1228,7 +1228,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 			goto out_err_desc_get;
 
 		switch (direction) {
-		case DMA_TO_DEVICE:
+		case DMA_MEM_TO_DEV:
 			desc->lli.dar = dws->tx_reg;
 			desc->lli.sar = buf_addr + (period_len * i);
 			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
@@ -1239,7 +1239,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
 			break;
-		case DMA_FROM_DEVICE:
+		case DMA_DEV_TO_MEM:
 			desc->lli.dar = buf_addr + (period_len * i);
 			desc->lli.sar = dws->rx_reg;
 			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index b47e2b803faf..009851b2aeea 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -330,7 +330,7 @@ static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
 	struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
 	u32 bus_addr;
 
-	if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_TO_DEVICE)
+	if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_MEM_TO_DEV)
 		bus_addr = desc->src_addr;
 	else
 		bus_addr = desc->dst_addr;
@@ -443,7 +443,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
 		control = (5 << M2M_CONTROL_PWSC_SHIFT);
 		control |= M2M_CONTROL_NO_HDSK;
 
-		if (data->direction == DMA_TO_DEVICE) {
+		if (data->direction == DMA_MEM_TO_DEV) {
 			control |= M2M_CONTROL_DAH;
 			control |= M2M_CONTROL_TM_TX;
 			control |= M2M_CONTROL_RSS_SSPTX;
@@ -463,7 +463,7 @@ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
 		control |= M2M_CONTROL_RSS_IDE;
 		control |= M2M_CONTROL_PW_16;
 
-		if (data->direction == DMA_TO_DEVICE) {
+		if (data->direction == DMA_MEM_TO_DEV) {
 			/* Worst case from the UG */
 			control = (3 << M2M_CONTROL_PWSC_SHIFT);
 			control |= M2M_CONTROL_DAH;
@@ -803,8 +803,8 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
 			switch (data->port) {
 			case EP93XX_DMA_SSP:
 			case EP93XX_DMA_IDE:
-				if (data->direction != DMA_TO_DEVICE &&
-				    data->direction != DMA_FROM_DEVICE)
+				if (data->direction != DMA_MEM_TO_DEV &&
+				    data->direction != DMA_DEV_TO_MEM)
 					return -EINVAL;
 				break;
 			default:
@@ -952,7 +952,7 @@ fail:
  */
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-			 unsigned int sg_len, enum dma_data_direction dir,
+			 unsigned int sg_len, enum dma_transfer_direction dir,
 			 unsigned long flags)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
@@ -988,7 +988,7 @@ ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			goto fail;
 		}
 
-		if (dir == DMA_TO_DEVICE) {
+		if (dir == DMA_MEM_TO_DEV) {
 			desc->src_addr = sg_dma_address(sg);
 			desc->dst_addr = edmac->runtime_addr;
 		} else {
@@ -1032,7 +1032,7 @@ fail:
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			   size_t buf_len, size_t period_len,
-			   enum dma_data_direction dir)
+			   enum dma_transfer_direction dir)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
@@ -1065,7 +1065,7 @@ ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			goto fail;
 		}
 
-		if (dir == DMA_TO_DEVICE) {
+		if (dir == DMA_MEM_TO_DEV) {
 			desc->src_addr = dma_addr + offset;
 			desc->dst_addr = edmac->runtime_addr;
 		} else {
@@ -1133,12 +1133,12 @@ static int ep93xx_dma_slave_config(struct ep93xx_dma_chan *edmac,
 		return -EINVAL;
 
 	switch (config->direction) {
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		width = config->src_addr_width;
 		addr = config->src_addr;
 		break;
 
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		width = config->dst_addr_width;
 		addr = config->dst_addr;
 		break;
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 8a781540590c..b98070c33ca9 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -772,7 +772,7 @@ fail:
  */
 static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
 	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	/*
 	 * This operation is not supported on the Freescale DMA controller
@@ -819,7 +819,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 			return -ENXIO;
 
 		/* we set the controller burst size depending on direction */
-		if (config->direction == DMA_TO_DEVICE)
+		if (config->direction == DMA_MEM_TO_DEV)
 			size = config->dst_addr_width * config->dst_maxburst;
 		else
 			size = config->src_addr_width * config->src_maxburst;
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index d746899f36e1..678cd01dc42c 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -106,7 +106,7 @@ static int imxdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		imx_dma_disable(imxdmac->imxdma_channel);
 		return 0;
 	case DMA_SLAVE_CONFIG:
-		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
 			imxdmac->per_address = dmaengine_cfg->src_addr;
 			imxdmac->watermark_level = dmaengine_cfg->src_maxburst;
 			imxdmac->word_size = dmaengine_cfg->src_addr_width;
@@ -223,7 +223,7 @@ static void imxdma_free_chan_resources(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
@@ -240,7 +240,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 		dma_length += sg->length;
 	}
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		dmamode = DMA_MODE_READ;
 	else
 		dmamode = DMA_MODE_WRITE;
@@ -270,7 +270,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 
 static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
@@ -316,7 +316,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 	imxdmac->sg_list[periods].page_link =
 		((unsigned long)imxdmac->sg_list | 0x01) & ~0x02;
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		dmamode = DMA_MODE_READ;
 	else
 		dmamode = DMA_MODE_WRITE;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index eab1fe71259e..065de5442c93 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -246,7 +246,7 @@ struct sdma_engine;
 struct sdma_channel {
 	struct sdma_engine		*sdma;
 	unsigned int			channel;
-	enum dma_data_direction		direction;
+	enum dma_transfer_direction		direction;
 	enum sdma_peripheral_type	peripheral_type;
 	unsigned int			event_id0;
 	unsigned int			event_id1;
@@ -649,7 +649,7 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 	struct sdma_buffer_descriptor *bd0 = sdma->channel[0].bd;
 	int ret;
 
-	if (sdmac->direction == DMA_FROM_DEVICE) {
+	if (sdmac->direction == DMA_DEV_TO_MEM) {
 		load_address = sdmac->pc_from_device;
 	} else {
 		load_address = sdmac->pc_to_device;
@@ -910,7 +910,7 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
@@ -1007,7 +1007,7 @@ err_out:
 
 static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1092,7 +1092,7 @@ static int sdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		sdma_disable_channel(sdmac);
 		return 0;
 	case DMA_SLAVE_CONFIG:
-		if (dmaengine_cfg->direction == DMA_FROM_DEVICE) {
+		if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
 			sdmac->per_address = dmaengine_cfg->src_addr;
 			sdmac->watermark_level = dmaengine_cfg->src_maxburst;
 			sdmac->word_size = dmaengine_cfg->src_addr_width;
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index 9e96c43a846a..6deda25fd0a8 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -394,10 +394,10 @@ static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
 							midc->dma->block_size);
 		/*Populate SAR and DAR values*/
 		sg_phy_addr = sg_phys(sg);
-		if (desc->dirn ==  DMA_TO_DEVICE) {
+		if (desc->dirn ==  DMA_MEM_TO_DEV) {
 			lli_bloc_desc->sar  = sg_phy_addr;
 			lli_bloc_desc->dar  = mids->dma_slave.dst_addr;
-		} else if (desc->dirn ==  DMA_FROM_DEVICE) {
+		} else if (desc->dirn ==  DMA_DEV_TO_MEM) {
 			lli_bloc_desc->sar  = mids->dma_slave.src_addr;
 			lli_bloc_desc->dar  = sg_phy_addr;
 		}
@@ -631,13 +631,13 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 		if (midc->dma->pimr_mask) {
 			cfg_hi.cfgx.protctl = 0x0; /*default value*/
 			cfg_hi.cfgx.fifo_mode = 1;
-			if (mids->dma_slave.direction == DMA_TO_DEVICE) {
+			if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
 				cfg_hi.cfgx.src_per = 0;
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.dst_per = 3;
 				if (mids->device_instance == 1)
 					cfg_hi.cfgx.dst_per = 1;
-			} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
+			} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
 				if (mids->device_instance == 0)
 					cfg_hi.cfgx.src_per = 2;
 				if (mids->device_instance == 1)
@@ -681,11 +681,11 @@ static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
 		ctl_lo.ctlx.sinc = 0;
 		ctl_lo.ctlx.dinc = 0;
 	} else {
-		if (mids->dma_slave.direction == DMA_TO_DEVICE) {
+		if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
 			ctl_lo.ctlx.sinc = 0;
 			ctl_lo.ctlx.dinc = 2;
 			ctl_lo.ctlx.tt_fc = 1;
-		} else if (mids->dma_slave.direction == DMA_FROM_DEVICE) {
+		} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
 			ctl_lo.ctlx.sinc = 2;
 			ctl_lo.ctlx.dinc = 0;
 			ctl_lo.ctlx.tt_fc = 2;
@@ -731,7 +731,7 @@ err_desc_get:
  */
 static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
 			struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_data_direction direction,
+			unsigned int sg_len, enum dma_transfer_direction direction,
 			unsigned long flags)
 {
 	struct intel_mid_dma_chan *midc = NULL;
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
index aea5ee88ce03..c6de919a6401 100644
--- a/drivers/dma/intel_mid_dma_regs.h
+++ b/drivers/dma/intel_mid_dma_regs.h
@@ -262,7 +262,7 @@ struct intel_mid_dma_desc {
 	unsigned int			lli_length;
 	unsigned int			current_lli;
 	dma_addr_t			next;
-	enum dma_data_direction		dirn;
+	enum dma_transfer_direction		dirn;
 	enum dma_status			status;
 	enum dma_slave_buswidth		width; /*width of DMA txn*/
 	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index 6815905a772f..0cee3b30cd77 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1362,7 +1362,7 @@ static void ipu_gc_tasklet(unsigned long arg)
 /* Allocate and initialise a transfer descriptor. */
 static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
 		struct scatterlist *sgl, unsigned int sg_len,
-		enum dma_data_direction direction, unsigned long tx_flags)
+		enum dma_transfer_direction direction, unsigned long tx_flags)
 {
 	struct idmac_channel *ichan = to_idmac_chan(chan);
 	struct idmac_tx_desc *desc = NULL;
@@ -1374,7 +1374,7 @@ static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan
 	    chan->chan_id != IDMAC_IC_7)
 		return NULL;
 
-	if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE) {
+	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV) {
 		dev_err(chan->device->dev, "Invalid DMA direction %d!\n", direction);
 		return NULL;
 	}
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index b4588bdd98bb..bdf4672b2553 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -377,7 +377,7 @@ static void mxs_dma_free_chan_resources(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long append)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
@@ -450,7 +450,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 			ccw->bits |= CCW_CHAIN;
 			ccw->bits |= CCW_HALT_ON_TERM;
 			ccw->bits |= CCW_TERM_FLUSH;
-			ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+			ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
 					MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ,
 					COMMAND);
 
@@ -472,7 +472,7 @@ err_out:
 
 static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -515,7 +515,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		ccw->bits |= CCW_IRQ;
 		ccw->bits |= CCW_HALT_ON_TERM;
 		ccw->bits |= CCW_TERM_FLUSH;
-		ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+		ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
 				MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
 
 		dma_addr += period_len;
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index a6d0e3dbed07..9944e8295498 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -99,7 +99,7 @@ struct pch_dma_desc {
 struct pch_dma_chan {
 	struct dma_chan		chan;
 	void __iomem *membase;
-	enum dma_data_direction	dir;
+	enum dma_transfer_direction dir;
 	struct tasklet_struct	tasklet;
 	unsigned long		err_status;
 
@@ -224,7 +224,7 @@ static void pdc_set_dir(struct dma_chan *chan)
 		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
 				       (DMA_CTL0_BITS_PER_CH * chan->chan_id));
 		val &= mask_mode;
-		if (pd_chan->dir == DMA_TO_DEVICE)
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
 				       DMA_CTL0_DIR_SHIFT_BITS);
 		else
@@ -242,7 +242,7 @@ static void pdc_set_dir(struct dma_chan *chan)
 		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
 						 (DMA_CTL0_BITS_PER_CH * ch));
 		val &= mask_mode;
-		if (pd_chan->dir == DMA_TO_DEVICE)
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
 				       DMA_CTL0_DIR_SHIFT_BITS);
 		else
@@ -607,7 +607,7 @@ static void pd_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 			struct scatterlist *sgl, unsigned int sg_len,
-			enum dma_data_direction direction, unsigned long flags)
+			enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 	struct pch_dma_slave *pd_slave = chan->private;
@@ -623,9 +623,9 @@ static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 		return NULL;
 	}
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		reg = pd_slave->rx_reg;
-	else if (direction == DMA_TO_DEVICE)
+	else if (direction == DMA_MEM_TO_DEV)
 		reg = pd_slave->tx_reg;
 	else
 		return NULL;
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 571041477ab2..1e58eeb030d8 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -320,14 +320,14 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
 	case DMA_SLAVE_CONFIG:
 		slave_config = (struct dma_slave_config *)arg;
 
-		if (slave_config->direction == DMA_TO_DEVICE) {
+		if (slave_config->direction == DMA_MEM_TO_DEV) {
 			if (slave_config->dst_addr)
 				pch->fifo_addr = slave_config->dst_addr;
 			if (slave_config->dst_addr_width)
 				pch->burst_sz = __ffs(slave_config->dst_addr_width);
 			if (slave_config->dst_maxburst)
 				pch->burst_len = slave_config->dst_maxburst;
-		} else if (slave_config->direction == DMA_FROM_DEVICE) {
+		} else if (slave_config->direction == DMA_DEV_TO_MEM) {
 			if (slave_config->src_addr)
 				pch->fifo_addr = slave_config->src_addr;
 			if (slave_config->src_addr_width)
@@ -597,7 +597,7 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
 
 static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
-		size_t period_len, enum dma_data_direction direction)
+		size_t period_len, enum dma_transfer_direction direction)
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
@@ -612,13 +612,13 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 	}
 
 	switch (direction) {
-	case DMA_TO_DEVICE:
+	case DMA_MEM_TO_DEV:
 		desc->rqcfg.src_inc = 1;
 		desc->rqcfg.dst_inc = 0;
 		src = dma_addr;
 		dst = pch->fifo_addr;
 		break;
-	case DMA_FROM_DEVICE:
+	case DMA_DEV_TO_MEM:
 		desc->rqcfg.src_inc = 0;
 		desc->rqcfg.dst_inc = 1;
 		src = pch->fifo_addr;
@@ -687,7 +687,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 
 static struct dma_async_tx_descriptor *
 pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flg)
 {
 	struct dma_pl330_desc *first, *desc = NULL;
@@ -702,9 +702,9 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		return NULL;
 
 	/* Make sure the direction is consistent */
-	if ((direction == DMA_TO_DEVICE &&
+	if ((direction == DMA_MEM_TO_DEV &&
 				peri->rqtype != MEMTODEV) ||
-			(direction == DMA_FROM_DEVICE &&
+			(direction == DMA_DEV_TO_MEM &&
 				peri->rqtype != DEVTOMEM)) {
 		dev_err(pch->dmac->pif.dev, "%s:%d Invalid Direction\n",
 				__func__, __LINE__);
@@ -747,7 +747,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		else
 			list_add_tail(&desc->node, &first->node);
 
-		if (direction == DMA_TO_DEVICE) {
+		if (direction == DMA_MEM_TO_DEV) {
 			desc->rqcfg.src_inc = 1;
 			desc->rqcfg.dst_inc = 0;
 			fill_px(&desc->px,
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 81809c2b46ab..592304fb41a6 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -23,7 +23,6 @@
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
-#include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/sh_dma.h>
@@ -479,19 +478,19 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan)
  * @sh_chan:	DMA channel
  * @flags:	DMA transfer flags
  * @dest:	destination DMA address, incremented when direction equals
- *		DMA_FROM_DEVICE or DMA_BIDIRECTIONAL
+ *		DMA_DEV_TO_MEM
  * @src:	source DMA address, incremented when direction equals
- *		DMA_TO_DEVICE or DMA_BIDIRECTIONAL
+ *		DMA_MEM_TO_DEV
  * @len:	DMA transfer length
  * @first:	if NULL, set to the current descriptor and cookie set to -EBUSY
  * @direction:	needed for slave DMA to decide which address to keep constant,
- *		equals DMA_BIDIRECTIONAL for MEMCPY
+ *		equals DMA_MEM_TO_MEM for MEMCPY
  * Returns 0 or an error
  * Locks: called with desc_lock held
  */
 static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
 	unsigned long flags, dma_addr_t *dest, dma_addr_t *src, size_t *len,
-	struct sh_desc **first, enum dma_data_direction direction)
+	struct sh_desc **first, enum dma_transfer_direction direction)
 {
 	struct sh_desc *new;
 	size_t copy_size;
@@ -531,9 +530,9 @@ static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
 	new->direction = direction;
 
 	*len -= copy_size;
-	if (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE)
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV)
 		*src += copy_size;
-	if (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE)
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM)
 		*dest += copy_size;
 
 	return new;
@@ -546,12 +545,12 @@ static struct sh_desc *sh_dmae_add_desc(struct sh_dmae_chan *sh_chan,
  * converted to scatter-gather to guarantee consistent locking and a correct
  * list manipulation. For slave DMA direction carries the usual meaning, and,
  * logically, the SG list is RAM and the addr variable contains slave address,
- * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_BIDIRECTIONAL
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
  * and the SG list contains only one element and points at the source buffer.
  */
 static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_chan,
 	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct scatterlist *sg;
 	struct sh_desc *first = NULL, *new = NULL /* compiler... */;
@@ -592,7 +591,7 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_sg(struct sh_dmae_chan *sh_c
 			dev_dbg(sh_chan->dev, "Add SG #%d@%p[%d], dma %llx\n",
 				i, sg, len, (unsigned long long)sg_addr);
 
-			if (direction == DMA_FROM_DEVICE)
+			if (direction == DMA_DEV_TO_MEM)
 				new = sh_dmae_add_desc(sh_chan, flags,
 						&sg_addr, addr, &len, &first,
 						direction);
@@ -646,13 +645,13 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
 	sg_dma_address(&sg) = dma_src;
 	sg_dma_len(&sg) = len;
 
-	return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_BIDIRECTIONAL,
+	return sh_dmae_prep_sg(sh_chan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
 			       flags);
 }
 
 static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct sh_dmae_slave *param;
 	struct sh_dmae_chan *sh_chan;
@@ -996,7 +995,7 @@ static void dmae_do_tasklet(unsigned long data)
 	spin_lock_irq(&sh_chan->desc_lock);
 	list_for_each_entry(desc, &sh_chan->ld_queue, node) {
 		if (desc->mark == DESC_SUBMITTED &&
-		    ((desc->direction == DMA_FROM_DEVICE &&
+		    ((desc->direction == DMA_DEV_TO_MEM &&
 		      (desc->hw.dar + desc->hw.tcr) == dar_buf) ||
 		     (desc->hw.sar + desc->hw.tcr) == sar_buf)) {
 			dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n",
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 467e4dcb20a0..0c6cbacb8321 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -216,7 +216,7 @@ struct d40_chan {
 	struct d40_log_lli_full		*lcpa;
 	/* Runtime reconfiguration */
 	dma_addr_t			runtime_addr;
-	enum dma_data_direction		runtime_direction;
+	enum dma_transfer_direction	runtime_direction;
 };
 
 /**
@@ -1854,7 +1854,7 @@ err:
 }
 
 static dma_addr_t
-d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
+d40_get_dev_addr(struct d40_chan *chan, enum dma_transfer_direction direction)
 {
 	struct stedma40_platform_data *plat = chan->base->plat_data;
 	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
@@ -1863,9 +1863,9 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
 	if (chan->runtime_addr)
 		return chan->runtime_addr;
 
-	if (direction == DMA_FROM_DEVICE)
+	if (direction == DMA_DEV_TO_MEM)
 		addr = plat->dev_rx[cfg->src_dev_type];
-	else if (direction == DMA_TO_DEVICE)
+	else if (direction == DMA_MEM_TO_DEV)
 		addr = plat->dev_tx[cfg->dst_dev_type];
 
 	return addr;
@@ -1874,7 +1874,7 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
 static struct dma_async_tx_descriptor *
 d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 	    struct scatterlist *sg_dst, unsigned int sg_len,
-	    enum dma_data_direction direction, unsigned long dma_flags)
+	    enum dma_transfer_direction direction, unsigned long dma_flags)
 {
 	struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
 	dma_addr_t src_dev_addr = 0;
@@ -1901,9 +1901,9 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 	if (direction != DMA_NONE) {
 		dma_addr_t dev_addr = d40_get_dev_addr(chan, direction);
 
-		if (direction == DMA_FROM_DEVICE)
+		if (direction == DMA_DEV_TO_MEM)
 			src_dev_addr = dev_addr;
-		else if (direction == DMA_TO_DEVICE)
+		else if (direction == DMA_MEM_TO_DEV)
 			dst_dev_addr = dev_addr;
 	}
 
@@ -2107,10 +2107,10 @@ d40_prep_memcpy_sg(struct dma_chan *chan,
 static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 							 struct scatterlist *sgl,
 							 unsigned int sg_len,
-							 enum dma_data_direction direction,
+							 enum dma_transfer_direction direction,
 							 unsigned long dma_flags)
 {
-	if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE)
+	if (direction != DMA_DEV_TO_MEM && direction != DMA_MEM_TO_DEV)
 		return NULL;
 
 	return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
@@ -2119,7 +2119,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 static struct dma_async_tx_descriptor *
 dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 		     size_t buf_len, size_t period_len,
-		     enum dma_data_direction direction)
+		     enum dma_transfer_direction direction)
 {
 	unsigned int periods = buf_len / period_len;
 	struct dma_async_tx_descriptor *txd;
@@ -2268,7 +2268,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
 	dst_addr_width = config->dst_addr_width;
 	dst_maxburst = config->dst_maxburst;
 
-	if (config->direction == DMA_FROM_DEVICE) {
+	if (config->direction == DMA_DEV_TO_MEM) {
 		dma_addr_t dev_addr_rx =
 			d40c->base->plat_data->dev_rx[cfg->src_dev_type];
 
@@ -2291,7 +2291,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
 		if (dst_maxburst == 0)
 			dst_maxburst = src_maxburst;
 
-	} else if (config->direction == DMA_TO_DEVICE) {
+	} else if (config->direction == DMA_MEM_TO_DEV) {
 		dma_addr_t dev_addr_tx =
 			d40c->base->plat_data->dev_tx[cfg->dst_dev_type];
 
@@ -2356,7 +2356,7 @@ static int d40_set_runtime_config(struct dma_chan *chan,
 		"configured channel %s for %s, data width %d/%d, "
 		"maxburst %d/%d elements, LE, no flow control\n",
 		dma_chan_name(chan),
-		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
+		(config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
 		src_addr_width, dst_addr_width,
 		src_maxburst, dst_maxburst);
 
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index a4a398f2ef61..8c880729b094 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -90,7 +90,7 @@ struct timb_dma_chan {
 	struct list_head	queue;
 	struct list_head	free_list;
 	unsigned int		bytes_per_line;
-	enum dma_data_direction	direction;
+	enum dma_transfer_direction	direction;
 	unsigned int		descs; /* Descriptors to allocate */
 	unsigned int		desc_elems; /* number of elems per descriptor */
 };
@@ -235,7 +235,7 @@ static void __td_start_dma(struct timb_dma_chan *td_chan)
 		"td_chan: %p, chan: %d, membase: %p\n",
 		td_chan, td_chan->chan.chan_id, td_chan->membase);
 
-	if (td_chan->direction == DMA_FROM_DEVICE) {
+	if (td_chan->direction == DMA_DEV_TO_MEM) {
 
 		/* descriptor address */
 		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR);
@@ -278,7 +278,7 @@ static void __td_finish(struct timb_dma_chan *td_chan)
 		txd->cookie);
 
 	/* make sure to stop the transfer */
-	if (td_chan->direction == DMA_FROM_DEVICE)
+	if (td_chan->direction == DMA_DEV_TO_MEM)
 		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER);
 /* Currently no support for stopping DMA transfers
 	else
@@ -398,7 +398,7 @@ static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan)
 	td_desc->txd.flags = DMA_CTRL_ACK;
 
 	td_desc->txd.phys = dma_map_single(chan2dmadev(chan),
-		td_desc->desc_list, td_desc->desc_list_len, DMA_TO_DEVICE);
+		td_desc->desc_list, td_desc->desc_list_len, DMA_MEM_TO_DEV);
 
 	err = dma_mapping_error(chan2dmadev(chan), td_desc->txd.phys);
 	if (err) {
@@ -419,7 +419,7 @@ static void td_free_desc(struct timb_dma_desc *td_desc)
 {
 	dev_dbg(chan2dev(td_desc->txd.chan), "Freeing desc: %p\n", td_desc);
 	dma_unmap_single(chan2dmadev(td_desc->txd.chan), td_desc->txd.phys,
-		td_desc->desc_list_len, DMA_TO_DEVICE);
+		td_desc->desc_list_len, DMA_MEM_TO_DEV);
 
 	kfree(td_desc->desc_list);
 	kfree(td_desc);
@@ -558,7 +558,7 @@ static void td_issue_pending(struct dma_chan *chan)
 
 static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
 	struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_data_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct timb_dma_chan *td_chan =
 		container_of(chan, struct timb_dma_chan, chan);
@@ -606,7 +606,7 @@ static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
 	}
 
 	dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys,
-		td_desc->desc_list_len, DMA_TO_DEVICE);
+		td_desc->desc_list_len, DMA_MEM_TO_DEV);
 
 	return &td_desc->txd;
 }
@@ -775,8 +775,8 @@ static int __devinit td_probe(struct platform_device *pdev)
 		td_chan->descs = pchan->descriptors;
 		td_chan->desc_elems = pchan->descriptor_elements;
 		td_chan->bytes_per_line = pchan->bytes_per_line;
-		td_chan->direction = pchan->rx ? DMA_FROM_DEVICE :
-			DMA_TO_DEVICE;
+		td_chan->direction = pchan->rx ? DMA_DEV_TO_MEM :
+			DMA_MEM_TO_DEV;
 
 		td_chan->membase = td->membase +
 			(i / 2) * TIMBDMA_INSTANCE_OFFSET +
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index cbd83e362b5e..6122c364cf11 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -845,7 +845,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 static struct dma_async_tx_descriptor *
 txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags)
 {
 	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
@@ -860,9 +860,9 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	BUG_ON(!ds || !ds->reg_width);
 	if (ds->tx_reg)
-		BUG_ON(direction != DMA_TO_DEVICE);
+		BUG_ON(direction != DMA_MEM_TO_DEV);
 	else
-		BUG_ON(direction != DMA_FROM_DEVICE);
+		BUG_ON(direction != DMA_DEV_TO_MEM);
 	if (unlikely(!sg_len))
 		return NULL;
 
@@ -882,7 +882,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		mem = sg_dma_address(sg);
 
 		if (__is_dmac64(ddev)) {
-			if (direction == DMA_TO_DEVICE) {
+			if (direction == DMA_MEM_TO_DEV) {
 				desc->hwdesc.SAR = mem;
 				desc->hwdesc.DAR = ds->tx_reg;
 			} else {
@@ -891,7 +891,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			}
 			desc->hwdesc.CNTR = sg_dma_len(sg);
 		} else {
-			if (direction == DMA_TO_DEVICE) {
+			if (direction == DMA_MEM_TO_DEV) {
 				desc->hwdesc32.SAR = mem;
 				desc->hwdesc32.DAR = ds->tx_reg;
 			} else {
@@ -900,7 +900,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			}
 			desc->hwdesc32.CNTR = sg_dma_len(sg);
 		}
-		if (direction == DMA_TO_DEVICE) {
+		if (direction == DMA_MEM_TO_DEV) {
 			sai = ds->reg_width;
 			dai = 0;
 		} else {
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 9eabffbc4e50..033f6aa670de 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -134,7 +134,7 @@ struct pl08x_txd {
 	struct dma_async_tx_descriptor tx;
 	struct list_head node;
 	struct list_head dsg_list;
-	enum dma_data_direction	direction;
+	enum dma_transfer_direction direction;
 	dma_addr_t llis_bus;
 	struct pl08x_lli *llis_va;
 	/* Default cctl value for LLIs */
@@ -197,7 +197,7 @@ struct pl08x_dma_chan {
 	dma_addr_t dst_addr;
 	u32 src_cctl;
 	u32 dst_cctl;
-	enum dma_data_direction	runtime_direction;
+	enum dma_transfer_direction runtime_direction;
 	dma_cookie_t lc;
 	struct list_head pend_list;
 	struct pl08x_txd *at;
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 4bfe0a2f7d50..f2c64f92c4a0 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -127,7 +127,7 @@ struct dw_cyclic_desc {
 
 struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_data_direction direction);
+		enum dma_transfer_direction direction);
 void dw_dma_cyclic_free(struct dma_chan *chan);
 int dw_dma_cyclic_start(struct dma_chan *chan);
 void dw_dma_cyclic_stop(struct dma_chan *chan);
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index cb2dd118cc0f..62ef6938da10 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -30,7 +30,7 @@ struct sh_desc {
 	struct sh_dmae_regs hw;
 	struct list_head node;
 	struct dma_async_tx_descriptor async_tx;
-	enum dma_data_direction direction;
+	enum dma_transfer_direction direction;
 	dma_cookie_t cookie;
 	size_t partial;
 	int chunks;
-- 
cgit v1.2.3


From b60503ba432b16fc84442a84e29a7aad2c0c363d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 20 Jan 2011 12:50:14 -0500
Subject: NVMe: New driver

This driver is for devices that follow the NVM Express standard

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 Documentation/ioctl/ioctl-number.txt |    1 +
 drivers/block/Kconfig                |   11 +
 drivers/block/Makefile               |    1 +
 drivers/block/nvme.c                 | 1043 ++++++++++++++++++++++++++++++++++
 include/linux/nvme.h                 |  343 +++++++++++
 5 files changed, 1399 insertions(+)
 create mode 100644 drivers/block/nvme.c
 create mode 100644 include/linux/nvme.h

(limited to 'include/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 54078ed96b37..4840334ea97b 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -149,6 +149,7 @@ Code  Seq#(hex)	Include File		Comments
 'M'	01-03	drivers/scsi/megaraid/megaraid_sas.h
 'M'	00-0F	drivers/video/fsl-diu-fb.h	conflict!
 'N'	00-1F	drivers/usb/scanner.h
+'N'	40-7F	drivers/block/nvme.c
 'O'     00-06   mtd/ubi-user.h		UBI
 'P'	all	linux/soundcard.h	conflict!
 'P'	60-6F	sound/sscape_ioctl.h	conflict!
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 6f07ec1c2f58..35e56e1c948f 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -315,6 +315,17 @@ config BLK_DEV_NBD
 
 	  If unsure, say N.
 
+config BLK_DEV_NVME
+	tristate "NVM Express block device"
+	depends on PCI
+	---help---
+	  The NVM Express driver is for solid state drives directly
+	  connected to the PCI or PCI Express bus.  If you know you
+	  don't have one of these, it is safe to answer N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called nvme.
+
 config BLK_DEV_OSD
 	tristate "OSD object-as-blkdev support"
 	depends on SCSI_OSD_ULD
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 76646e9a1c91..349539ad3ad9 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_XILINX_SYSACE)	+= xsysace.o
 obj-$(CONFIG_CDROM_PKTCDVD)	+= pktcdvd.o
 obj-$(CONFIG_MG_DISK)		+= mg_disk.o
 obj-$(CONFIG_SUNVDC)		+= sunvdc.o
+obj-$(CONFIG_BLK_DEV_NVME)	+= nvme.o
 obj-$(CONFIG_BLK_DEV_OSD)	+= osdblk.o
 
 obj-$(CONFIG_BLK_DEV_UMEM)	+= umem.o
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
new file mode 100644
index 000000000000..ef66eccc2aa2
--- /dev/null
+++ b/drivers/block/nvme.c
@@ -0,0 +1,1043 @@
+/*
+ * NVM Express device driver
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/nvme.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+
+#define NVME_Q_DEPTH 1024
+#define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
+#define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
+#define NVME_MINORS 64
+
+static int nvme_major;
+module_param(nvme_major, int, 0);
+
+/*
+ * Represents an NVM Express device.  Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+	struct list_head node;
+	struct nvme_queue **queues;
+	u32 __iomem *dbs;
+	struct pci_dev *pci_dev;
+	int instance;
+	int queue_count;
+	u32 ctrl_config;
+	struct msix_entry *entry;
+	struct nvme_bar __iomem *bar;
+	struct list_head namespaces;
+};
+
+/*
+ * An NVM Express namespace is equivalent to a SCSI LUN
+ */
+struct nvme_ns {
+	struct list_head list;
+
+	struct nvme_dev *dev;
+	struct request_queue *queue;
+	struct gendisk *disk;
+
+	int ns_id;
+	int lba_shift;
+};
+
+/*
+ * An NVM Express queue.  Each device has at least two (one for admin
+ * commands and one for I/O commands).
+ */
+struct nvme_queue {
+	struct device *q_dmadev;
+	spinlock_t q_lock;
+	struct nvme_command *sq_cmds;
+	volatile struct nvme_completion *cqes;
+	dma_addr_t sq_dma_addr;
+	dma_addr_t cq_dma_addr;
+	wait_queue_head_t sq_full;
+	struct bio_list sq_cong;
+	u32 __iomem *q_db;
+	u16 q_depth;
+	u16 cq_vector;
+	u16 sq_head;
+	u16 sq_tail;
+	u16 cq_head;
+	u16 cq_cycle;
+	unsigned long cmdid_data[];
+};
+
+/*
+ * Check we didin't inadvertently grow the command struct
+ */
+static inline void _nvme_check_size(void)
+{
+	BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
+}
+
+/**
+ * alloc_cmdid - Allocate a Command ID
+ * @param nvmeq The queue that will be used for this command
+ * @param ctx A pointer that will be passed to the handler
+ * @param handler The ID of the handler to call
+ *
+ * Allocate a Command ID for a queue.  The data passed in will
+ * be passed to the completion handler.  This is implemented by using
+ * the bottom two bits of the ctx pointer to store the handler ID.
+ * Passing in a pointer that's not 4-byte aligned will cause a BUG.
+ * We can change this if it becomes a problem.
+ */
+static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, int handler)
+{
+	int depth = nvmeq->q_depth;
+	unsigned long data = (unsigned long)ctx | handler;
+	int cmdid;
+
+	BUG_ON((unsigned long)ctx & 3);
+
+	do {
+		cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth);
+		if (cmdid >= depth)
+			return -EBUSY;
+	} while (test_and_set_bit(cmdid, nvmeq->cmdid_data));
+
+	nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(depth)] = data;
+	return cmdid;
+}
+
+static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
+								int handler)
+{
+	int cmdid;
+	wait_event_killable(nvmeq->sq_full,
+			(cmdid = alloc_cmdid(nvmeq, ctx, handler)) >= 0);
+	return (cmdid < 0) ? -EINTR : cmdid;
+}
+
+/* If you need more than four handlers, you'll need to change how
+ * alloc_cmdid and nvme_process_cq work
+ */
+enum {
+	sync_completion_id = 0,
+	bio_completion_id,
+};
+
+static unsigned long free_cmdid(struct nvme_queue *nvmeq, int cmdid)
+{
+	unsigned long data;
+
+	data = nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(nvmeq->q_depth)];
+	clear_bit(cmdid, nvmeq->cmdid_data);
+	wake_up(&nvmeq->sq_full);
+	return data;
+}
+
+static struct nvme_queue *get_nvmeq(struct nvme_ns *ns)
+{
+	return ns->dev->queues[1];
+}
+
+static void put_nvmeq(struct nvme_queue *nvmeq)
+{
+}
+
+/**
+ * nvme_submit_cmd: Copy a command into a queue and ring the doorbell
+ * @nvmeq: The queue to use
+ * @cmd: The command to send
+ *
+ * Safe to use from interrupt context
+ */
+static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+{
+	unsigned long flags;
+	u16 tail;
+	/* XXX: Need to check tail isn't going to overrun head */
+	spin_lock_irqsave(&nvmeq->q_lock, flags);
+	tail = nvmeq->sq_tail;
+	memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+	writel(tail, nvmeq->q_db);
+	if (++tail == nvmeq->q_depth)
+		tail = 0;
+	nvmeq->sq_tail = tail;
+	spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+
+	return 0;
+}
+
+struct nvme_req_info {
+	struct bio *bio;
+	int nents;
+	struct scatterlist sg[0];
+};
+
+/* XXX: use a mempool */
+static struct nvme_req_info *alloc_info(unsigned nseg, gfp_t gfp)
+{
+	return kmalloc(sizeof(struct nvme_req_info) +
+			sizeof(struct scatterlist) * nseg, gfp);
+}
+
+static void free_info(struct nvme_req_info *info)
+{
+	kfree(info);
+}
+
+static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
+						struct nvme_completion *cqe)
+{
+	struct nvme_req_info *info = ctx;
+	struct bio *bio = info->bio;
+	u16 status = le16_to_cpup(&cqe->status) >> 1;
+
+	dma_unmap_sg(nvmeq->q_dmadev, info->sg, info->nents,
+			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	free_info(info);
+	bio_endio(bio, status ? -EIO : 0);
+}
+
+static int nvme_map_bio(struct device *dev, struct nvme_req_info *info,
+		struct bio *bio, enum dma_data_direction dma_dir, int psegs)
+{
+	struct bio_vec *bvec;
+	struct scatterlist *sg = info->sg;
+	int i, nsegs;
+
+	sg_init_table(sg, psegs);
+	bio_for_each_segment(bvec, bio, i) {
+		sg_set_page(sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
+		/* XXX: handle non-mergable here */
+		nsegs++;
+	}
+	info->nents = nsegs;
+
+	return dma_map_sg(dev, info->sg, info->nents, dma_dir);
+}
+
+static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+								struct bio *bio)
+{
+	struct nvme_rw_command *cmnd;
+	struct nvme_req_info *info;
+	enum dma_data_direction dma_dir;
+	int cmdid;
+	u16 control;
+	u32 dsmgmt;
+	unsigned long flags;
+	int psegs = bio_phys_segments(ns->queue, bio);
+
+	info = alloc_info(psegs, GFP_NOIO);
+	if (!info)
+		goto congestion;
+	info->bio = bio;
+
+	cmdid = alloc_cmdid(nvmeq, info, bio_completion_id);
+	if (unlikely(cmdid < 0))
+		goto free_info;
+
+	control = 0;
+	if (bio->bi_rw & REQ_FUA)
+		control |= NVME_RW_FUA;
+	if (bio->bi_rw & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+		control |= NVME_RW_LR;
+
+	dsmgmt = 0;
+	if (bio->bi_rw & REQ_RAHEAD)
+		dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+
+	spin_lock_irqsave(&nvmeq->q_lock, flags);
+	cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail].rw;
+
+	if (bio_data_dir(bio)) {
+		cmnd->opcode = nvme_cmd_write;
+		dma_dir = DMA_TO_DEVICE;
+	} else {
+		cmnd->opcode = nvme_cmd_read;
+		dma_dir = DMA_FROM_DEVICE;
+	}
+
+	nvme_map_bio(nvmeq->q_dmadev, info, bio, dma_dir, psegs);
+
+	cmnd->flags = 1;
+	cmnd->command_id = cmdid;
+	cmnd->nsid = cpu_to_le32(ns->ns_id);
+	cmnd->prp1 = cpu_to_le64(sg_phys(info->sg));
+	/* XXX: Support more than one PRP */
+	cmnd->slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9));
+	cmnd->length = cpu_to_le16((bio->bi_size >> ns->lba_shift) - 1);
+	cmnd->control = cpu_to_le16(control);
+	cmnd->dsmgmt = cpu_to_le32(dsmgmt);
+
+	writel(nvmeq->sq_tail, nvmeq->q_db);
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+
+	spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+
+	return 0;
+
+ free_info:
+	free_info(info);
+ congestion:
+	return -EBUSY;
+}
+
+/*
+ * NB: return value of non-zero would mean that we were a stacking driver.
+ * make_request must always succeed.
+ */
+static int nvme_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct nvme_ns *ns = q->queuedata;
+	struct nvme_queue *nvmeq = get_nvmeq(ns);
+
+	if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
+		blk_set_queue_congested(q, rw_is_sync(bio->bi_rw));
+		bio_list_add(&nvmeq->sq_cong, bio);
+	}
+	put_nvmeq(nvmeq);
+
+	return 0;
+}
+
+struct sync_cmd_info {
+	struct task_struct *task;
+	u32 result;
+	int status;
+};
+
+static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
+						struct nvme_completion *cqe)
+{
+	struct sync_cmd_info *cmdinfo = ctx;
+	cmdinfo->result = le32_to_cpup(&cqe->result);
+	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
+	wake_up_process(cmdinfo->task);
+}
+
+typedef void (*completion_fn)(struct nvme_queue *, void *,
+						struct nvme_completion *);
+
+static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
+{
+	u16 head, cycle;
+
+	static const completion_fn completions[4] = {
+		[sync_completion_id] = sync_completion,
+		[bio_completion_id]  = bio_completion,
+	};
+
+	head = nvmeq->cq_head;
+	cycle = nvmeq->cq_cycle;
+
+	for (;;) {
+		unsigned long data;
+		void *ptr;
+		unsigned char handler;
+		struct nvme_completion cqe = nvmeq->cqes[head];
+		if ((le16_to_cpu(cqe.status) & 1) != cycle)
+			break;
+		nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
+		if (++head == nvmeq->q_depth) {
+			head = 0;
+			cycle = !cycle;
+		}
+
+		data = free_cmdid(nvmeq, cqe.command_id);
+		handler = data & 3;
+		ptr = (void *)(data & ~3UL);
+		completions[handler](nvmeq, ptr, &cqe);
+	}
+
+	/* If the controller ignores the cq head doorbell and continuously
+	 * writes to the queue, it is theoretically possible to wrap around
+	 * the queue twice and mistakenly return IRQ_NONE.  Linux only
+	 * requires that 0.1% of your interrupts are handled, so this isn't
+	 * a big problem.
+	 */
+	if (head == nvmeq->cq_head && cycle == nvmeq->cq_cycle)
+		return IRQ_NONE;
+
+	writel(head, nvmeq->q_db + 1);
+	nvmeq->cq_head = head;
+	nvmeq->cq_cycle = cycle;
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t nvme_irq(int irq, void *data)
+{
+	return nvme_process_cq(data);
+}
+
+/*
+ * Returns 0 on success.  If the result is negative, it's a Linux error code;
+ * if the result is positive, it's an NVM Express status code
+ */
+static int nvme_submit_sync_cmd(struct nvme_queue *q, struct nvme_command *cmd,
+								u32 *result)
+{
+	int cmdid;
+	struct sync_cmd_info cmdinfo;
+
+	cmdinfo.task = current;
+	cmdinfo.status = -EINTR;
+
+	cmdid = alloc_cmdid_killable(q, &cmdinfo, sync_completion_id);
+	if (cmdid < 0)
+		return cmdid;
+	cmd->common.command_id = cmdid;
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	nvme_submit_cmd(q, cmd);
+	schedule();
+
+	if (result)
+		*result = cmdinfo.result;
+
+	return cmdinfo.status;
+}
+
+static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
+								u32 *result)
+{
+	return nvme_submit_sync_cmd(dev->queues[0], cmd, result);
+}
+
+static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
+{
+	int status;
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.delete_queue.opcode = opcode;
+	c.delete_queue.qid = cpu_to_le16(id);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	if (status)
+		return -EIO;
+	return 0;
+}
+
+static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
+						struct nvme_queue *nvmeq)
+{
+	int status;
+	struct nvme_command c;
+	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
+
+	memset(&c, 0, sizeof(c));
+	c.create_cq.opcode = nvme_admin_create_cq;
+	c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
+	c.create_cq.cqid = cpu_to_le16(qid);
+	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+	c.create_cq.cq_flags = cpu_to_le16(flags);
+	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	if (status)
+		return -EIO;
+	return 0;
+}
+
+static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
+						struct nvme_queue *nvmeq)
+{
+	int status;
+	struct nvme_command c;
+	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+
+	memset(&c, 0, sizeof(c));
+	c.create_sq.opcode = nvme_admin_create_sq;
+	c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
+	c.create_sq.sqid = cpu_to_le16(qid);
+	c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+	c.create_sq.sq_flags = cpu_to_le16(flags);
+	c.create_sq.cqid = cpu_to_le16(qid);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	if (status)
+		return -EIO;
+	return 0;
+}
+
+static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
+{
+	return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
+}
+
+static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
+{
+	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
+}
+
+static void nvme_free_queue(struct nvme_dev *dev, int qid)
+{
+	struct nvme_queue *nvmeq = dev->queues[qid];
+
+	free_irq(dev->entry[nvmeq->cq_vector].vector, nvmeq);
+
+	/* Don't tell the adapter to delete the admin queue */
+	if (qid) {
+		adapter_delete_sq(dev, qid);
+		adapter_delete_cq(dev, qid);
+	}
+
+	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
+				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
+	dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+	kfree(nvmeq);
+}
+
+static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
+							int depth, int vector)
+{
+	struct device *dmadev = &dev->pci_dev->dev;
+	unsigned extra = (depth + BITS_TO_LONGS(depth)) * sizeof(long);
+	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
+	if (!nvmeq)
+		return NULL;
+
+	nvmeq->cqes = dma_alloc_coherent(dmadev, CQ_SIZE(depth),
+					&nvmeq->cq_dma_addr, GFP_KERNEL);
+	if (!nvmeq->cqes)
+		goto free_nvmeq;
+	memset((void *)nvmeq->cqes, 0, CQ_SIZE(depth));
+
+	nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
+					&nvmeq->sq_dma_addr, GFP_KERNEL);
+	if (!nvmeq->sq_cmds)
+		goto free_cqdma;
+
+	nvmeq->q_dmadev = dmadev;
+	spin_lock_init(&nvmeq->q_lock);
+	nvmeq->cq_head = 0;
+	nvmeq->cq_cycle = 1;
+	init_waitqueue_head(&nvmeq->sq_full);
+	bio_list_init(&nvmeq->sq_cong);
+	nvmeq->q_db = &dev->dbs[qid * 2];
+	nvmeq->q_depth = depth;
+	nvmeq->cq_vector = vector;
+
+	return nvmeq;
+
+ free_cqdma:
+	dma_free_coherent(dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes,
+							nvmeq->cq_dma_addr);
+ free_nvmeq:
+	kfree(nvmeq);
+	return NULL;
+}
+
+static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev,
+					int qid, int cq_size, int vector)
+{
+	int result;
+	struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
+
+	result = adapter_alloc_cq(dev, qid, nvmeq);
+	if (result < 0)
+		goto free_nvmeq;
+
+	result = adapter_alloc_sq(dev, qid, nvmeq);
+	if (result < 0)
+		goto release_cq;
+
+	result = request_irq(dev->entry[vector].vector, nvme_irq,
+				IRQF_DISABLED | IRQF_SHARED, "nvme", nvmeq);
+	if (result < 0)
+		goto release_sq;
+
+	return nvmeq;
+
+ release_sq:
+	adapter_delete_sq(dev, qid);
+ release_cq:
+	adapter_delete_cq(dev, qid);
+ free_nvmeq:
+	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
+				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
+	dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+	kfree(nvmeq);
+	return NULL;
+}
+
+static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
+{
+	int result;
+	u32 aqa;
+	struct nvme_queue *nvmeq;
+
+	dev->dbs = ((void __iomem *)dev->bar) + 4096;
+
+	nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
+
+	aqa = nvmeq->q_depth - 1;
+	aqa |= aqa << 16;
+
+	dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
+	dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
+	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+
+	writel(aqa, &dev->bar->aqa);
+	writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
+	writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
+	writel(dev->ctrl_config, &dev->bar->cc);
+
+	while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
+		msleep(100);
+		if (fatal_signal_pending(current))
+			return -EINTR;
+	}
+
+	result = request_irq(dev->entry[0].vector, nvme_irq,
+			IRQF_DISABLED | IRQF_SHARED, "nvme admin", nvmeq);
+	dev->queues[0] = nvmeq;
+	return result;
+}
+
+static int nvme_identify(struct nvme_ns *ns, void __user *addr, int cns)
+{
+	struct nvme_dev *dev = ns->dev;
+	int status;
+	struct nvme_command c;
+	void *page;
+	dma_addr_t dma_addr;
+
+	page = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
+								GFP_KERNEL);
+
+	memset(&c, 0, sizeof(c));
+	c.identify.opcode = nvme_admin_identify;
+	c.identify.nsid = cns ? 0 : cpu_to_le32(ns->ns_id);
+	c.identify.prp1 = cpu_to_le64(dma_addr);
+	c.identify.cns = cpu_to_le32(cns);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+
+	if (status)
+		status = -EIO;
+	else if (copy_to_user(addr, page, 4096))
+		status = -EFAULT;
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, page, dma_addr);
+
+	return status;
+}
+
+static int nvme_get_range_type(struct nvme_ns *ns, void __user *addr)
+{
+	struct nvme_dev *dev = ns->dev;
+	int status;
+	struct nvme_command c;
+	void *page;
+	dma_addr_t dma_addr;
+
+	page = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
+								GFP_KERNEL);
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode = nvme_admin_get_features;
+	c.features.nsid = cpu_to_le32(ns->ns_id);
+	c.features.prp1 = cpu_to_le64(dma_addr);
+	c.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+
+	/* XXX: Assuming first range for now */
+	if (status)
+		status = -EIO;
+	else if (copy_to_user(addr, page, 64))
+		status = -EFAULT;
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, page, dma_addr);
+
+	return status;
+}
+
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
+							unsigned long arg)
+{
+	struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+	switch (cmd) {
+	case NVME_IOCTL_IDENTIFY_NS:
+		return nvme_identify(ns, (void __user *)arg, 0);
+	case NVME_IOCTL_IDENTIFY_CTRL:
+		return nvme_identify(ns, (void __user *)arg, 1);
+	case NVME_IOCTL_GET_RANGE_TYPE:
+		return nvme_get_range_type(ns, (void __user *)arg);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static const struct block_device_operations nvme_fops = {
+	.owner		= THIS_MODULE,
+	.ioctl		= nvme_ioctl,
+};
+
+static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int index,
+			struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
+{
+	struct nvme_ns *ns;
+	struct gendisk *disk;
+	int lbaf;
+
+	if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
+		return NULL;
+
+	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+	if (!ns)
+		return NULL;
+	ns->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!ns->queue)
+		goto out_free_ns;
+	ns->queue->queue_flags = QUEUE_FLAG_DEFAULT | QUEUE_FLAG_NOMERGES |
+				QUEUE_FLAG_NONROT | QUEUE_FLAG_DISCARD;
+	blk_queue_make_request(ns->queue, nvme_make_request);
+	ns->dev = dev;
+	ns->queue->queuedata = ns;
+
+	disk = alloc_disk(NVME_MINORS);
+	if (!disk)
+		goto out_free_queue;
+	ns->ns_id = index;
+	ns->disk = disk;
+	lbaf = id->flbas & 0xf;
+	ns->lba_shift = id->lbaf[lbaf].ds;
+
+	disk->major = nvme_major;
+	disk->minors = NVME_MINORS;
+	disk->first_minor = NVME_MINORS * index;
+	disk->fops = &nvme_fops;
+	disk->private_data = ns;
+	disk->queue = ns->queue;
+	sprintf(disk->disk_name, "nvme%dn%d", dev->instance, index);
+	set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+	return ns;
+
+ out_free_queue:
+	blk_cleanup_queue(ns->queue);
+ out_free_ns:
+	kfree(ns);
+	return NULL;
+}
+
+static void nvme_ns_free(struct nvme_ns *ns)
+{
+	put_disk(ns->disk);
+	blk_cleanup_queue(ns->queue);
+	kfree(ns);
+}
+
+static int set_queue_count(struct nvme_dev *dev, int sq_count, int cq_count)
+{
+	int status;
+	u32 result;
+	struct nvme_command c;
+	u32 q_count = (sq_count - 1) | ((cq_count - 1) << 16);
+
+	memset(&c, 0, sizeof(c));
+	c.features.opcode = nvme_admin_get_features;
+	c.features.fid = cpu_to_le32(NVME_FEAT_NUM_QUEUES);
+	c.features.dword11 = cpu_to_le32(q_count);
+
+	status = nvme_submit_admin_cmd(dev, &c, &result);
+	if (status)
+		return -EIO;
+	return min(result & 0xffff, result >> 16) + 1;
+}
+
+/* XXX: Create per-CPU queues */
+static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
+{
+	int this_cpu;
+
+	set_queue_count(dev, 1, 1);
+
+	this_cpu = get_cpu();
+	dev->queues[1] = nvme_create_queue(dev, 1, NVME_Q_DEPTH, this_cpu);
+	put_cpu();
+	if (!dev->queues[1])
+		return -ENOMEM;
+	dev->queue_count++;
+
+	return 0;
+}
+
+static void nvme_free_queues(struct nvme_dev *dev)
+{
+	int i;
+
+	for (i = dev->queue_count - 1; i >= 0; i--)
+		nvme_free_queue(dev, i);
+}
+
+static int __devinit nvme_dev_add(struct nvme_dev *dev)
+{
+	int res, nn, i;
+	struct nvme_ns *ns, *next;
+	void *id;
+	dma_addr_t dma_addr;
+	struct nvme_command cid, crt;
+
+	res = nvme_setup_io_queues(dev);
+	if (res)
+		return res;
+
+	/* XXX: Switch to a SG list once prp2 works */
+	id = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
+								GFP_KERNEL);
+
+	memset(&cid, 0, sizeof(cid));
+	cid.identify.opcode = nvme_admin_identify;
+	cid.identify.nsid = 0;
+	cid.identify.prp1 = cpu_to_le64(dma_addr);
+	cid.identify.cns = cpu_to_le32(1);
+
+	res = nvme_submit_admin_cmd(dev, &cid, NULL);
+	if (res) {
+		res = -EIO;
+		goto out_free;
+	}
+
+	nn = le32_to_cpup(&((struct nvme_id_ctrl *)id)->nn);
+
+	cid.identify.cns = 0;
+	memset(&crt, 0, sizeof(crt));
+	crt.features.opcode = nvme_admin_get_features;
+	crt.features.prp1 = cpu_to_le64(dma_addr + 4096);
+	crt.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
+
+	for (i = 0; i < nn; i++) {
+		cid.identify.nsid = cpu_to_le32(i);
+		res = nvme_submit_admin_cmd(dev, &cid, NULL);
+		if (res)
+			continue;
+
+		if (((struct nvme_id_ns *)id)->ncap == 0)
+			continue;
+
+		crt.features.nsid = cpu_to_le32(i);
+		res = nvme_submit_admin_cmd(dev, &crt, NULL);
+		if (res)
+			continue;
+
+		ns = nvme_alloc_ns(dev, i, id, id + 4096);
+		if (ns)
+			list_add_tail(&ns->list, &dev->namespaces);
+	}
+	list_for_each_entry(ns, &dev->namespaces, list)
+		add_disk(ns->disk);
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+	return 0;
+
+ out_free:
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		list_del(&ns->list);
+		nvme_ns_free(ns);
+	}
+
+	dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+	return res;
+}
+
+static int nvme_dev_remove(struct nvme_dev *dev)
+{
+	struct nvme_ns *ns, *next;
+
+	/* TODO: wait all I/O finished or cancel them */
+
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		list_del(&ns->list);
+		del_gendisk(ns->disk);
+		nvme_ns_free(ns);
+	}
+
+	nvme_free_queues(dev);
+
+	return 0;
+}
+
+/* XXX: Use an ida or something to let remove / add work correctly */
+static void nvme_set_instance(struct nvme_dev *dev)
+{
+	static int instance;
+	dev->instance = instance++;
+}
+
+static void nvme_release_instance(struct nvme_dev *dev)
+{
+}
+
+static int __devinit nvme_probe(struct pci_dev *pdev,
+						const struct pci_device_id *id)
+{
+	int result = -ENOMEM;
+	struct nvme_dev *dev;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	dev->entry = kcalloc(num_possible_cpus(), sizeof(*dev->entry),
+								GFP_KERNEL);
+	if (!dev->entry)
+		goto free;
+	dev->queues = kcalloc(2, sizeof(void *), GFP_KERNEL);
+	if (!dev->queues)
+		goto free;
+
+	INIT_LIST_HEAD(&dev->namespaces);
+	dev->pci_dev = pdev;
+	pci_set_drvdata(pdev, dev);
+	dma_set_mask(&dev->pci_dev->dev, DMA_BIT_MASK(64));
+	nvme_set_instance(dev);
+
+	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
+	if (!dev->bar) {
+		result = -ENOMEM;
+		goto disable;
+	}
+
+	result = nvme_configure_admin_queue(dev);
+	if (result)
+		goto unmap;
+	dev->queue_count++;
+
+	result = nvme_dev_add(dev);
+	if (result)
+		goto delete;
+	return 0;
+
+ delete:
+	nvme_free_queues(dev);
+ unmap:
+	iounmap(dev->bar);
+ disable:
+	pci_disable_msix(pdev);
+	nvme_release_instance(dev);
+ free:
+	kfree(dev->queues);
+	kfree(dev->entry);
+	kfree(dev);
+	return result;
+}
+
+static void __devexit nvme_remove(struct pci_dev *pdev)
+{
+	struct nvme_dev *dev = pci_get_drvdata(pdev);
+	nvme_dev_remove(dev);
+	pci_disable_msix(pdev);
+	iounmap(dev->bar);
+	nvme_release_instance(dev);
+	kfree(dev->queues);
+	kfree(dev->entry);
+	kfree(dev);
+}
+
+/* These functions are yet to be implemented */
+#define nvme_error_detected NULL
+#define nvme_dump_registers NULL
+#define nvme_link_reset NULL
+#define nvme_slot_reset NULL
+#define nvme_error_resume NULL
+#define nvme_suspend NULL
+#define nvme_resume NULL
+
+static struct pci_error_handlers nvme_err_handler = {
+	.error_detected	= nvme_error_detected,
+	.mmio_enabled	= nvme_dump_registers,
+	.link_reset	= nvme_link_reset,
+	.slot_reset	= nvme_slot_reset,
+	.resume		= nvme_error_resume,
+};
+
+/* Move to pci_ids.h later */
+#define PCI_CLASS_STORAGE_EXPRESS	0x010802
+
+static DEFINE_PCI_DEVICE_TABLE(nvme_id_table) = {
+	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, nvme_id_table);
+
+static struct pci_driver nvme_driver = {
+	.name		= "nvme",
+	.id_table	= nvme_id_table,
+	.probe		= nvme_probe,
+	.remove		= __devexit_p(nvme_remove),
+	.suspend	= nvme_suspend,
+	.resume		= nvme_resume,
+	.err_handler	= &nvme_err_handler,
+};
+
+static int __init nvme_init(void)
+{
+	int result;
+
+	nvme_major = register_blkdev(nvme_major, "nvme");
+	if (nvme_major <= 0)
+		return -EBUSY;
+
+	result = pci_register_driver(&nvme_driver);
+	if (!result)
+		return 0;
+
+	unregister_blkdev(nvme_major, "nvme");
+	return result;
+}
+
+static void __exit nvme_exit(void)
+{
+	pci_unregister_driver(&nvme_driver);
+	unregister_blkdev(nvme_major, "nvme");
+}
+
+MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.1");
+module_init(nvme_init);
+module_exit(nvme_exit);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
new file mode 100644
index 000000000000..9ba53584f722
--- /dev/null
+++ b/include/linux/nvme.h
@@ -0,0 +1,343 @@
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _LINUX_NVME_H
+#define _LINUX_NVME_H
+
+#include <linux/types.h>
+
+struct nvme_bar {
+	__u64			cap;	/* Controller Capabilities */
+	__u32			vs;	/* Version */
+	__u32			ims;	/* Interrupt Mask Set */
+	__u32			imc;	/* Interrupt Mask Clear */
+	__u32			cc;	/* Controller Configuration */
+	__u32			csts;	/* Controller Status */
+	__u32			aqa;	/* Admin Queue Attributes */
+	__u64			asq;	/* Admin SQ Base Address */
+	__u64			acq;	/* Admin CQ Base Address */
+};
+
+enum {
+	NVME_CC_ENABLE		= 1 << 0,
+	NVME_CC_CSS_NVM		= 0 << 4,
+	NVME_CC_MPS_SHIFT	= 7,
+	NVME_CC_ARB_RR		= 0 << 11,
+	NVME_CC_ARB_WRRU	= 1 << 11,
+	NVME_CC_ARB_VS		= 3 << 11,
+	NVME_CC_SHN_NONE	= 0 << 13,
+	NVME_CC_SHN_NORMAL	= 1 << 13,
+	NVME_CC_SHN_ABRUPT	= 2 << 13,
+	NVME_CSTS_RDY		= 1 << 0,
+	NVME_CSTS_CFS		= 1 << 1,
+	NVME_CSTS_SHST_NORMAL	= 0 << 2,
+	NVME_CSTS_SHST_OCCUR	= 1 << 2,
+	NVME_CSTS_SHST_CMPLT	= 2 << 2,
+};
+
+#define NVME_VS(major, minor)	(major << 16 | minor)
+
+struct nvme_id_ctrl {
+	__le16			vid;
+	__le16			ssvid;
+	char			sn[20];
+	char			mn[40];
+	char			fr[8];
+	__le32			nn;
+	__u8			rab;
+	__u8			rsvd77[178];
+	__le16			oacs;
+	__u8			acl;
+	__u8			aerl;
+	__u8			frmw;
+	__u8			lpa;
+	__u8			elpe;
+	__u8			npss;
+	__u8			rsvd264[248];
+	__le64			psd[32];
+	__le16			oncs;
+	__le16			fuses;
+	__u8			fna;
+	__u8			vwc;
+	__le16			awun;
+	__le16			awupf;
+	__u8			rsvd778[246];
+	__u8			cmdset[2048];
+	__u8			vs[1024];
+};
+
+struct nvme_lbaf {
+	__le16			ms;
+	__u8			ds;
+	__u8			rp;
+};
+
+struct nvme_id_ns {
+	__le64			nsze;
+	__le64			ncap;
+	__le64			nuse;
+	__u8			nsfeat;
+	__u8			nlbaf;
+	__u8			flbas;
+	__u8			mc;
+	__u8			dpc;
+	__u8			dps;
+	__u8			rsvd30[98];
+	struct nvme_lbaf	lbaf[16];
+	__u8			rsvd192[192];
+	__u8			vs[3712];
+};
+
+enum {
+	NVME_NS_FEAT_THIN	= 1 << 0,
+	NVME_LBAF_RP_BEST	= 0,
+	NVME_LBAF_RP_BETTER	= 1,
+	NVME_LBAF_RP_GOOD	= 2,
+	NVME_LBAF_RP_DEGRADED	= 3,
+};
+
+struct nvme_lba_range_type {
+	__u8			type;
+	__u8			attributes;
+	__u8			rsvd2[14];
+	__u64			slba;
+	__u64			nlb;
+	__u8			guid[16];
+	__u8			rsvd48[16];
+};
+
+enum {
+	NVME_LBART_TYPE_FS	= 0x01,
+	NVME_LBART_TYPE_RAID	= 0x02,
+	NVME_LBART_TYPE_CACHE	= 0x03,
+	NVME_LBART_TYPE_SWAP	= 0x04,
+
+	NVME_LBART_ATTRIB_TEMP	= 1 << 0,
+	NVME_LBART_ATTRIB_HIDE	= 1 << 1,
+};
+
+/* I/O commands */
+
+enum nvme_opcode {
+	nvme_cmd_flush		= 0x00,
+	nvme_cmd_write		= 0x01,
+	nvme_cmd_read		= 0x02,
+	nvme_cmd_write_uncor	= 0x04,
+	nvme_cmd_compare	= 0x05,
+	nvme_cmd_dsm		= 0x09,
+};
+
+struct nvme_rw_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	__le64			prp1;
+	__le64			prp2;
+	__le64			slba;
+	__le16			length;
+	__le16			control;
+	__le32			dsmgmt;
+	__le32			reftag;
+	__le16			apptag;
+	__le16			appmask;
+};
+
+enum {
+	NVME_RW_LR			= 1 << 15,
+	NVME_RW_FUA			= 1 << 14,
+	NVME_RW_DSM_FREQ_UNSPEC		= 0,
+	NVME_RW_DSM_FREQ_TYPICAL	= 1,
+	NVME_RW_DSM_FREQ_RARE		= 2,
+	NVME_RW_DSM_FREQ_READS		= 3,
+	NVME_RW_DSM_FREQ_WRITES		= 4,
+	NVME_RW_DSM_FREQ_RW		= 5,
+	NVME_RW_DSM_FREQ_ONCE		= 6,
+	NVME_RW_DSM_FREQ_PREFETCH	= 7,
+	NVME_RW_DSM_FREQ_TEMP		= 8,
+	NVME_RW_DSM_LATENCY_NONE	= 0 << 4,
+	NVME_RW_DSM_LATENCY_IDLE	= 1 << 4,
+	NVME_RW_DSM_LATENCY_NORM	= 2 << 4,
+	NVME_RW_DSM_LATENCY_LOW		= 3 << 4,
+	NVME_RW_DSM_SEQ_REQ		= 1 << 6,
+	NVME_RW_DSM_COMPRESSED		= 1 << 7,
+};
+
+/* Admin commands */
+
+enum nvme_admin_opcode {
+	nvme_admin_delete_sq		= 0x00,
+	nvme_admin_create_sq		= 0x01,
+	nvme_admin_get_features		= 0x02,
+	nvme_admin_delete_cq		= 0x04,
+	nvme_admin_create_cq		= 0x05,
+	nvme_admin_identify		= 0x06,
+	nvme_admin_abort_cmd		= 0x08,
+	nvme_admin_set_features		= 0x09,
+	nvme_admin_get_log_page		= 0x0a,
+	nvme_admin_async_event		= 0x0c,
+	nvme_admin_download_fw		= 0x0d,
+	nvme_admin_security_recv	= 0x0e,
+	nvme_admin_format_nvm		= 0x10,
+	nvme_admin_security_send	= 0x11,
+	nvme_admin_activate_fw		= 0x14,
+};
+
+enum {
+	NVME_QUEUE_PHYS_CONTIG	= (1 << 0),
+	NVME_CQ_IRQ_ENABLED	= (1 << 1),
+	NVME_SQ_PRIO_URGENT	= (0 << 1),
+	NVME_SQ_PRIO_HIGH	= (1 << 1),
+	NVME_SQ_PRIO_MEDIUM	= (2 << 1),
+	NVME_SQ_PRIO_LOW	= (3 << 1),
+	NVME_FEAT_ARBITRATION	= 0x01,
+	NVME_FEAT_POWER_MGMT	= 0x02,
+	NVME_FEAT_LBA_RANGE	= 0x03,
+	NVME_FEAT_TEMP_THRESH	= 0x04,
+	NVME_FEAT_ERR_RECOVERY	= 0x05,
+	NVME_FEAT_VOLATILE_WC	= 0x06,
+	NVME_FEAT_NUM_QUEUES	= 0x07,
+	NVME_FEAT_IRQ_COALESCE	= 0x08,
+	NVME_FEAT_IRQ_CONFIG	= 0x09,
+	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
+	NVME_FEAT_ASYNC_EVENT	= 0x0b,
+	NVME_FEAT_SW_PROGRESS	= 0x0c,
+};
+
+struct nvme_identify {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			cns;
+	__u32			rsvd11[5];
+};
+
+struct nvme_features {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			fid;
+	__le32			dword11;
+	__u32			rsvd12[4];
+};
+
+struct nvme_create_cq {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			rsvd1[5];
+	__le64			prp1;
+	__u64			rsvd8;
+	__le16			cqid;
+	__le16			qsize;
+	__le16			cq_flags;
+	__le16			irq_vector;
+	__u32			rsvd12[4];
+};
+
+struct nvme_create_sq {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			rsvd1[5];
+	__le64			prp1;
+	__u64			rsvd8;
+	__le16			sqid;
+	__le16			qsize;
+	__le16			sq_flags;
+	__le16			cqid;
+	__le32			rsvd12[4];
+};
+
+struct nvme_delete_queue {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[9];
+	__le16			qid;
+	__le16			rsvd10;
+	__le32			rsvd11[5];
+};
+
+struct nvme_common_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u32			rsvd2[14];
+};
+
+struct nvme_command {
+	union {
+		struct nvme_common_command common;
+		struct nvme_rw_command rw;
+		struct nvme_identify identify;
+		struct nvme_features features;
+		struct nvme_create_cq create_cq;
+		struct nvme_create_sq create_sq;
+		struct nvme_delete_queue delete_queue;
+	};
+};
+
+/* XXX: Sync with spec */
+enum {
+	NVME_SC_SUCCESS			= 0x0,
+	NVME_SC_INVALID_OPCODE		= 0x1,
+	NVME_SC_INVALID_FIELD		= 0x2,
+	NVME_SC_CMDID_CONFLICT		= 0x3,
+	NVME_SC_DATA_XFER_ERROR		= 0x4,
+	NVME_SC_POWER_LOSS		= 0x5,
+	NVME_SC_INTERNAL		= 0x6,
+	NVME_SC_ABORT_REQ		= 0x7,
+	NVME_SC_ABORT_QUEUE		= 0x8,
+	NVME_SC_FUSED_FAIL		= 0x9,
+	NVME_SC_FUSED_MISSING		= 0xa,
+	NVME_SC_LBA_RANGE		= 0x80,
+	NVME_SC_CAP_EXCEEDED		= 0x81,
+	NVME_SC_NS_NOT_READY		= 0x82,
+	NVME_SC_CQ_INVALID		= 0x100,
+	NVME_SC_QID_INVALID		= 0x101,
+	NVME_SC_QUEUE_SIZE		= 0x102,
+	NVME_SC_WRITE_FAULT		= 0x280,
+	NVME_SC_READ_ERROR		= 0x281,
+};
+
+struct nvme_completion {
+	__le32	result;		/* Used by admin commands to return data */
+	__le32	rsvd;
+	__le16	sq_head;	/* how much of this queue may be reclaimed */
+	__le16	sq_id;		/* submission queue that generated this entry */
+	__u16	command_id;	/* of the command which completed */
+	__le16	status;		/* did the command fail, and if so, why? */
+};
+
+#define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
+#define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
+#define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
+
+#endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 7b4fe9b1cb4b9a6f4ae23a12ef96d08d96e2a5da Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 26 Jan 2011 10:01:21 -0500
Subject: NVMe: Make nvme_common_command more featureful

Add prp1, prp2 and the metadata prp to the common command, since the
fields are generally used this way.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 9ba53584f722..1c0b5ef08959 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -142,6 +142,18 @@ enum nvme_opcode {
 	nvme_cmd_dsm		= 0x09,
 };
 
+struct nvme_common_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	__le64			prp1;
+	__le64			prp2;
+	__u32			rsvd10[6];
+};
+
 struct nvme_rw_command {
 	__u8			opcode;
 	__u8			flags;
@@ -284,14 +296,6 @@ struct nvme_delete_queue {
 	__le32			rsvd11[5];
 };
 
-struct nvme_common_command {
-	__u8			opcode;
-	__u8			flags;
-	__u16			command_id;
-	__le32			nsid;
-	__u32			rsvd2[14];
-};
-
 struct nvme_command {
 	union {
 		struct nvme_common_command common;
-- 
cgit v1.2.3


From a53295b6998f62d961c29e54051c1cf1d738c2b3 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 1 Feb 2011 16:13:29 -0500
Subject: NVMe: Add NVME_IOCTL_SUBMIT_IO

Allow userspace to submit synchronous I/O like the SCSI sg interface does.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h | 18 ++++++++++++++++++
 2 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index f44d6cd87ea2..40fb2e1bdfe4 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -780,6 +780,47 @@ static int nvme_get_range_type(struct nvme_ns *ns, unsigned long addr)
 	return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
 }
 
+static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+{
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_queue *nvmeq;
+	struct nvme_user_io io;
+	struct nvme_command c;
+	unsigned length;
+	u32 result;
+	int nents, status;
+	struct scatterlist *sg;
+
+	if (copy_from_user(&io, uio, sizeof(io)))
+		return -EFAULT;
+	length = io.nblocks << io.block_shift;
+	nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length, &sg);
+	if (nents < 0)
+		return nents;
+
+	memset(&c, 0, sizeof(c));
+	c.rw.opcode = io.opcode;
+	c.rw.flags = io.flags;
+	c.rw.nsid = cpu_to_le32(io.nsid);
+	c.rw.slba = cpu_to_le64(io.slba);
+	c.rw.length = cpu_to_le16(io.nblocks - 1);
+	c.rw.control = cpu_to_le16(io.control);
+	c.rw.dsmgmt = cpu_to_le16(io.dsmgmt);
+	c.rw.reftag = cpu_to_le32(io.reftag);	/* XXX: endian? */
+	c.rw.apptag = cpu_to_le16(io.apptag);
+	c.rw.appmask = cpu_to_le16(io.appmask);
+	/* XXX: metadata */
+	nvme_setup_prps(&c.common, sg, length);
+
+	nvmeq = get_nvmeq(ns);
+	status = nvme_submit_sync_cmd(nvmeq, &c, &result);
+	put_nvmeq(nvmeq);
+
+	nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg, nents);
+	put_user(result, &uio->result);
+	return status;
+}
+
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 							unsigned long arg)
 {
@@ -792,6 +833,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 		return nvme_identify(ns, arg, 1);
 	case NVME_IOCTL_GET_RANGE_TYPE:
 		return nvme_get_range_type(ns, arg);
+	case NVME_IOCTL_SUBMIT_IO:
+		return nvme_submit_io(ns, (void __user *)arg);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 1c0b5ef08959..0aaecb059d14 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -340,8 +340,26 @@ struct nvme_completion {
 	__le16	status;		/* did the command fail, and if so, why? */
 };
 
+struct nvme_user_io {
+	__u8	opcode;
+	__u8	flags;
+	__u16	control;
+	__u32	nsid;
+	__u64	metadata;
+	__u64	addr;
+	__u64	slba;
+	__u16	nblocks;
+	__u16	block_shift;
+	__u32	dsmgmt;
+	__u32	reftag;
+	__u16	apptag;
+	__u16	appmask;
+	__u32	result;
+};
+
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
+#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_rw_command)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 7a63e07b9a98b77dd075e06b93c1d8dc871ddad5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 3 Feb 2011 09:20:57 -0500
Subject: NVMe: Add remaining status codes

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0aaecb059d14..dbbdc126401b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -321,14 +321,29 @@ enum {
 	NVME_SC_ABORT_QUEUE		= 0x8,
 	NVME_SC_FUSED_FAIL		= 0x9,
 	NVME_SC_FUSED_MISSING		= 0xa,
+	NVME_SC_INVALID_NS		= 0xb,
 	NVME_SC_LBA_RANGE		= 0x80,
 	NVME_SC_CAP_EXCEEDED		= 0x81,
 	NVME_SC_NS_NOT_READY		= 0x82,
 	NVME_SC_CQ_INVALID		= 0x100,
 	NVME_SC_QID_INVALID		= 0x101,
 	NVME_SC_QUEUE_SIZE		= 0x102,
+	NVME_SC_ABORT_LIMIT		= 0x103,
+	NVME_SC_ABORT_MISSING		= 0x104,
+	NVME_SC_ASYNC_LIMIT		= 0x105,
+	NVME_SC_FIRMWARE_SLOT		= 0x106,
+	NVME_SC_FIRMWARE_IMAGE		= 0x107,
+	NVME_SC_INVALID_VECTOR		= 0x108,
+	NVME_SC_INVALID_LOG_PAGE	= 0x109,
+	NVME_SC_INVALID_FORMAT		= 0x10a,
+	NVME_SC_BAD_ATTRIBUTES		= 0x180,
 	NVME_SC_WRITE_FAULT		= 0x280,
 	NVME_SC_READ_ERROR		= 0x281,
+	NVME_SC_GUARD_CHECK		= 0x282,
+	NVME_SC_APPTAG_CHECK		= 0x283,
+	NVME_SC_REFTAG_CHECK		= 0x284,
+	NVME_SC_COMPARE_FAILED		= 0x285,
+	NVME_SC_ACCESS_DENIED		= 0x286,
 };
 
 struct nvme_completion {
-- 
cgit v1.2.3


From 6ee44cdced04a53dc4f27eb97067e6cd33784726 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 3 Feb 2011 10:58:26 -0500
Subject: NVMe: Add download / activate firmware ioctls

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h | 33 +++++++++++++++++++++++++++------
 2 files changed, 72 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 744db3877c42..7cdf7f69cdcd 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -829,6 +829,47 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	return status;
 }
 
+static int nvme_download_firmware(struct nvme_ns *ns,
+						struct nvme_dlfw __user *udlfw)
+{
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_dlfw dlfw;
+	struct nvme_command c;
+	int nents, status;
+	struct scatterlist *sg;
+
+	if (copy_from_user(&dlfw, udlfw, sizeof(dlfw)))
+		return -EFAULT;
+	if (dlfw.length >= (1 << 30))
+		return -EINVAL;
+
+	nents = nvme_map_user_pages(dev, 1, dlfw.addr, dlfw.length * 4, &sg);
+	if (nents < 0)
+		return nents;
+
+	memset(&c, 0, sizeof(c));
+	c.dlfw.opcode = nvme_admin_download_fw;
+	c.dlfw.numd = cpu_to_le32(dlfw.length);
+	c.dlfw.offset = cpu_to_le32(dlfw.offset);
+	nvme_setup_prps(&c.common, sg, dlfw.length * 4);
+
+	status = nvme_submit_admin_cmd(dev, &c, NULL);
+	nvme_unmap_user_pages(dev, 0, dlfw.addr, dlfw.length * 4, sg, nents);
+	return status;
+}
+
+static int nvme_activate_firmware(struct nvme_ns *ns, unsigned long arg)
+{
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_activate_fw;
+	c.common.rsvd10[0] = cpu_to_le32(arg);
+
+	return nvme_submit_admin_cmd(dev, &c, NULL);
+}
+
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 							unsigned long arg)
 {
@@ -843,6 +884,10 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 		return nvme_get_range_type(ns, arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
+	case NVME_IOCTL_DOWNLOAD_FW:
+		return nvme_download_firmware(ns, (void __user *)arg);
+	case NVME_IOCTL_ACTIVATE_FW:
+		return nvme_activate_firmware(ns, arg);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index dbbdc126401b..8eed0e432eef 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -262,7 +262,7 @@ struct nvme_create_cq {
 	__u8			opcode;
 	__u8			flags;
 	__u16			command_id;
-	__le32			rsvd1[5];
+	__u32			rsvd1[5];
 	__le64			prp1;
 	__u64			rsvd8;
 	__le16			cqid;
@@ -276,14 +276,14 @@ struct nvme_create_sq {
 	__u8			opcode;
 	__u8			flags;
 	__u16			command_id;
-	__le32			rsvd1[5];
+	__u32			rsvd1[5];
 	__le64			prp1;
 	__u64			rsvd8;
 	__le16			sqid;
 	__le16			qsize;
 	__le16			sq_flags;
 	__le16			cqid;
-	__le32			rsvd12[4];
+	__u32			rsvd12[4];
 };
 
 struct nvme_delete_queue {
@@ -292,8 +292,20 @@ struct nvme_delete_queue {
 	__u16			command_id;
 	__u32			rsvd1[9];
 	__le16			qid;
-	__le16			rsvd10;
-	__le32			rsvd11[5];
+	__u16			rsvd10;
+	__u32			rsvd11[5];
+};
+
+struct nvme_download_firmware {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__u32			rsvd1[5];
+	__le64			prp1;
+	__le64			prp2;
+	__le32			numd;
+	__le32			offset;
+	__u32			rsvd12[4];
 };
 
 struct nvme_command {
@@ -305,6 +317,7 @@ struct nvme_command {
 		struct nvme_create_cq create_cq;
 		struct nvme_create_sq create_sq;
 		struct nvme_delete_queue delete_queue;
+		struct nvme_download_firmware dlfw;
 	};
 };
 
@@ -348,7 +361,7 @@ enum {
 
 struct nvme_completion {
 	__le32	result;		/* Used by admin commands to return data */
-	__le32	rsvd;
+	__u32	rsvd;
 	__le16	sq_head;	/* how much of this queue may be reclaimed */
 	__le16	sq_id;		/* submission queue that generated this entry */
 	__u16	command_id;	/* of the command which completed */
@@ -372,9 +385,17 @@ struct nvme_user_io {
 	__u32	result;
 };
 
+struct nvme_dlfw {
+	__u64	addr;
+	__u32	length;	/* In dwords */
+	__u32	offset;	/* In dwords */
+};
+
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
 #define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_rw_command)
+#define NVME_IOCTL_DOWNLOAD_FW	_IOR('N', 0x44, struct nvme_dlfw)
+#define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 897cfe1ce7db152fa6dde576f4213a6160bf6502 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Mon, 14 Feb 2011 12:20:15 -0500
Subject: NVMe: Update BAR structure to match the current spec

Add two reserved registers in the middle of the BAR to match the 1.0
spec plus ECN 0002.

Also rename IMC and ISC to INTMC and INTSC to conform with the spec.
We still don't need to use them :-)

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8eed0e432eef..757faa71666e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -24,10 +24,12 @@
 struct nvme_bar {
 	__u64			cap;	/* Controller Capabilities */
 	__u32			vs;	/* Version */
-	__u32			ims;	/* Interrupt Mask Set */
-	__u32			imc;	/* Interrupt Mask Clear */
+	__u32			intms;	/* Interrupt Mask Set */
+	__u32			intmc;	/* Interrupt Mask Clear */
 	__u32			cc;	/* Controller Configuration */
+	__u32			rsvd1;	/* Reserved */
 	__u32			csts;	/* Controller Status */
+	__u32			rsvd2;	/* Reserved */
 	__u32			aqa;	/* Admin Queue Attributes */
 	__u64			asq;	/* Admin SQ Base Address */
 	__u64			acq;	/* Admin CQ Base Address */
-- 
cgit v1.2.3


From 2ddc4f74d8adcf3e1cdec7f3e72d19b5c878597c Mon Sep 17 00:00:00 2001
From: Krzysztof Wierzbicki <krzysztof.wierzbicki@intel.com>
Date: Mon, 28 Feb 2011 08:27:13 +0100
Subject: NVMe: Update admin opcodes to match the 1.0RC spec

Signed-off-by: Krzysztof Wierzbicki <krzysztof.wierzbicki@intel.com>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 757faa71666e..c46a9b7988fb 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -199,19 +199,19 @@ enum {
 enum nvme_admin_opcode {
 	nvme_admin_delete_sq		= 0x00,
 	nvme_admin_create_sq		= 0x01,
-	nvme_admin_get_features		= 0x02,
+	nvme_admin_get_log_page		= 0x02,
 	nvme_admin_delete_cq		= 0x04,
 	nvme_admin_create_cq		= 0x05,
 	nvme_admin_identify		= 0x06,
 	nvme_admin_abort_cmd		= 0x08,
 	nvme_admin_set_features		= 0x09,
-	nvme_admin_get_log_page		= 0x0a,
+	nvme_admin_get_features		= 0x0a,
 	nvme_admin_async_event		= 0x0c,
-	nvme_admin_download_fw		= 0x0d,
-	nvme_admin_security_recv	= 0x0e,
-	nvme_admin_format_nvm		= 0x10,
-	nvme_admin_security_send	= 0x11,
-	nvme_admin_activate_fw		= 0x14,
+	nvme_admin_activate_fw		= 0x10,
+	nvme_admin_download_fw		= 0x11,
+	nvme_admin_format_nvm		= 0x80,
+	nvme_admin_security_send	= 0x81,
+	nvme_admin_security_recv	= 0x82,
 };
 
 enum {
-- 
cgit v1.2.3


From 19e899b2f9f89f4a290dd5c9c24d15987a18ab21 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 16 Mar 2011 16:29:24 -0400
Subject: NVMe: Remove outdated comments

The head can never overrun the tail since we won't allocate enough command
IDs to let that happen.  The status codes are in sync with the spec.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 1 -
 include/linux/nvme.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 740a9c1b81aa..d4f95eb51dc1 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -245,7 +245,6 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
 {
 	unsigned long flags;
 	u16 tail;
-	/* XXX: Need to check tail isn't going to overrun head */
 	spin_lock_irqsave(&nvmeq->q_lock, flags);
 	tail = nvmeq->sq_tail;
 	memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c46a9b7988fb..6b5a8d19daf5 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -323,7 +323,6 @@ struct nvme_command {
 	};
 };
 
-/* XXX: Sync with spec */
 enum {
 	NVME_SC_SUCCESS			= 0x0,
 	NVME_SC_INVALID_OPCODE		= 0x1,
-- 
cgit v1.2.3


From 9d4af1b7796ba02b73a79a8694399e5a3cd1c55d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Sun, 20 Mar 2011 07:27:10 -0400
Subject: NVMe: Correct the definitions of two ioctls

NVME_IOCTL_SUBMIT_IO has a struct nvme_user_io, not a struct nvme_rw_command
as a parameter, and NVME_IOCTL_DOWNLOAD_FW is a Write, not a Read.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6b5a8d19daf5..fd10d597cca7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -395,8 +395,8 @@ struct nvme_dlfw {
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
-#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_rw_command)
-#define NVME_IOCTL_DOWNLOAD_FW	_IOR('N', 0x44, struct nvme_dlfw)
+#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_user_io)
+#define NVME_IOCTL_DOWNLOAD_FW	_IOW('N', 0x44, struct nvme_dlfw)
 #define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From 6c7d49455ceb63064f992347d9185ff5bf43497a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Mon, 21 Mar 2011 09:48:57 -0400
Subject: NVMe: Change the definition of nvme_user_io

The read and write commands don't define a 'result', so there's no need
to copy it back to userspace.

Remove the ability of the ioctl to submit commands to a different
namespace; it's just asking for trouble, and the use case I have in mind
will be addressed througha  different ioctl in the future.  That removes
the need for both the block_shift and nsid arguments.

Check that the opcode is one of 'read' or 'write'.  Future opcodes may
be added in the future, but we will need a different structure definition
for them.

The nblocks field is redefined to be 0-based.  This allows the user to
request the full 65536 blocks.

Don't byteswap the reftag, apptag and appmask.  Martin Petersen tells
me these are calculated in big-endian and are transmitted to the device
in big-endian.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 27 +++++++++++++++++----------
 include/linux/nvme.h |  8 +++-----
 2 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index d0b52622e261..90a96ec8a596 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -1035,29 +1035,37 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	struct nvme_user_io io;
 	struct nvme_command c;
 	unsigned length;
-	u32 result;
 	int nents, status;
 	struct scatterlist *sg;
 	struct nvme_prps *prps;
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
-	length = io.nblocks << io.block_shift;
-	nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length, &sg);
+	length = (io.nblocks + 1) << ns->lba_shift;
+
+	switch (io.opcode) {
+	case nvme_cmd_write:
+	case nvme_cmd_read:
+		nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr,
+								length, &sg);
+	default:
+		return -EFAULT;
+	}
+
 	if (nents < 0)
 		return nents;
 
 	memset(&c, 0, sizeof(c));
 	c.rw.opcode = io.opcode;
 	c.rw.flags = io.flags;
-	c.rw.nsid = cpu_to_le32(io.nsid);
+	c.rw.nsid = cpu_to_le32(ns->ns_id);
 	c.rw.slba = cpu_to_le64(io.slba);
-	c.rw.length = cpu_to_le16(io.nblocks - 1);
+	c.rw.length = cpu_to_le16(io.nblocks);
 	c.rw.control = cpu_to_le16(io.control);
 	c.rw.dsmgmt = cpu_to_le16(io.dsmgmt);
-	c.rw.reftag = cpu_to_le32(io.reftag);	/* XXX: endian? */
-	c.rw.apptag = cpu_to_le16(io.apptag);
-	c.rw.appmask = cpu_to_le16(io.appmask);
+	c.rw.reftag = io.reftag;
+	c.rw.apptag = io.apptag;
+	c.rw.appmask = io.appmask;
 	/* XXX: metadata */
 	prps = nvme_setup_prps(dev, &c.common, sg, length);
 
@@ -1069,11 +1077,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	 * additional races since q_lock already protects against other CPUs.
 	 */
 	put_nvmeq(nvmeq);
-	status = nvme_submit_sync_cmd(nvmeq, &c, &result, IO_TIMEOUT);
+	status = nvme_submit_sync_cmd(nvmeq, &c, NULL, IO_TIMEOUT);
 
 	nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg, nents);
 	nvme_free_prps(dev, prps);
-	put_user(result, &uio->result);
 	return status;
 }
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index fd10d597cca7..347ad5f9a721 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -373,17 +373,15 @@ struct nvme_user_io {
 	__u8	opcode;
 	__u8	flags;
 	__u16	control;
-	__u32	nsid;
+	__u16	nblocks;
+	__u16	rsvd;
 	__u64	metadata;
 	__u64	addr;
 	__u64	slba;
-	__u16	nblocks;
-	__u16	block_shift;
 	__u32	dsmgmt;
 	__u32	reftag;
 	__u16	apptag;
 	__u16	appmask;
-	__u32	result;
 };
 
 struct nvme_dlfw {
@@ -395,7 +393,7 @@ struct nvme_dlfw {
 #define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
 #define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
 #define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
-#define NVME_IOCTL_SUBMIT_IO	_IOWR('N', 0x43, struct nvme_user_io)
+#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x43, struct nvme_user_io)
 #define NVME_IOCTL_DOWNLOAD_FW	_IOW('N', 0x44, struct nvme_dlfw)
 #define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
 
-- 
cgit v1.2.3


From 7f53f9d2424533256ae86f7df5661a17de743de8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 22 Mar 2011 15:55:45 -0400
Subject: NVMe: Correct the Controller Configuration settings

The arbitration field was extended by one bit, shifting the shutdown
notification bits by one.  Also, the SQ/CQ entry size was made
configurable for future extensions.

Reported-by: Paul Luse <paul.e.luse@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c |  1 +
 include/linux/nvme.h | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index d3eeca5a3c4c..014a7f6e39bc 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -905,6 +905,7 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 	dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
 	dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
 	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
 	writel(0, &dev->bar->cc);
 	writel(aqa, &dev->bar->aqa);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 347ad5f9a721..9d6febb91521 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -41,10 +41,12 @@ enum {
 	NVME_CC_MPS_SHIFT	= 7,
 	NVME_CC_ARB_RR		= 0 << 11,
 	NVME_CC_ARB_WRRU	= 1 << 11,
-	NVME_CC_ARB_VS		= 3 << 11,
-	NVME_CC_SHN_NONE	= 0 << 13,
-	NVME_CC_SHN_NORMAL	= 1 << 13,
-	NVME_CC_SHN_ABRUPT	= 2 << 13,
+	NVME_CC_ARB_VS		= 7 << 11,
+	NVME_CC_SHN_NONE	= 0 << 14,
+	NVME_CC_SHN_NORMAL	= 1 << 14,
+	NVME_CC_SHN_ABRUPT	= 2 << 14,
+	NVME_CC_IOSQES		= 6 << 16,
+	NVME_CC_IOCQES		= 4 << 20,
 	NVME_CSTS_RDY		= 1 << 0,
 	NVME_CSTS_CFS		= 1 << 1,
 	NVME_CSTS_SHST_NORMAL	= 0 << 2,
-- 
cgit v1.2.3


From 22605f96810d073eb74051d0295b6577d6a6a563 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 19 Apr 2011 15:04:20 -0400
Subject: NVMe: Time out initialisation after a few seconds

THe device reports (in its capability register) how long it will take
to initialise.  If that time elapses before the ready bit becomes set,
conclude the device is broken and refuse to initialise it.  Log a nice
error message so the user knows why we did nothing.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 10 ++++++++++
 include/linux/nvme.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index bcc780ac4ec0..57f2b33a47dd 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -893,6 +893,8 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
 	u32 aqa;
+	u64 cap;
+	unsigned long timeout;
 	struct nvme_queue *nvmeq;
 
 	dev->dbs = ((void __iomem *)dev->bar) + 4096;
@@ -915,10 +917,18 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 	writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
 	writel(dev->ctrl_config, &dev->bar->cc);
 
+	cap = readq(&dev->bar->cap);
+	timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+
 	while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
 		msleep(100);
 		if (fatal_signal_pending(current))
 			return -EINTR;
+		if (time_after(jiffies, timeout)) {
+			dev_err(&dev->pci_dev->dev,
+				"Device not ready; aborting initialisation\n");
+			return -ENODEV;
+		}
 	}
 
 	result = queue_request_irq(dev, nvmeq, "nvme admin");
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 9d6febb91521..a19304fefa7d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -35,6 +35,8 @@ struct nvme_bar {
 	__u64			acq;	/* Admin CQ Base Address */
 };
 
+#define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
+
 enum {
 	NVME_CC_ENABLE		= 1 << 0,
 	NVME_CC_CSS_NVM		= 0 << 4,
-- 
cgit v1.2.3


From 6bbf1acddeed0bfb345a5578f9fcada16f1e514f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 20 May 2011 13:03:42 -0400
Subject: NVMe: Rework ioctls

Remove the special-purpose IDENTIFY, GET_RANGE_TYPE, DOWNLOAD_FIRMWARE
and ACTIVATE_FIRMWARE commands.  Replace them with a generic ADMIN_CMD
ioctl that can submit any admin command.

Add a new ID ioctl that returns the namespace ID of the queried device.
It corresponds to the SCSI Idlun ioctl.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 128 ++++++++++++++++-----------------------------------
 include/linux/nvme.h |  34 +++++++++-----
 2 files changed, 63 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index f5e51a6116e3..9e3c724b95c3 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -1033,51 +1033,6 @@ static void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 		put_page(sg_page(&sg[i]));
 }
 
-static int nvme_submit_user_admin_command(struct nvme_dev *dev,
-					unsigned long addr, unsigned length,
-					struct nvme_command *cmd)
-{
-	int err, nents, tmplen = length;
-	struct scatterlist *sg;
-	struct nvme_prps *prps;
-
-	nents = nvme_map_user_pages(dev, 0, addr, length, &sg);
-	if (nents < 0)
-		return nents;
-	prps = nvme_setup_prps(dev, &cmd->common, sg, &tmplen, GFP_KERNEL);
-	if (tmplen != length)
-		err = -ENOMEM;
-	else
-		err = nvme_submit_admin_cmd(dev, cmd, NULL);
-	nvme_unmap_user_pages(dev, 0, addr, length, sg, nents);
-	nvme_free_prps(dev, prps);
-	return err ? -EIO : 0;
-}
-
-static int nvme_identify(struct nvme_ns *ns, unsigned long addr, int cns)
-{
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.identify.opcode = nvme_admin_identify;
-	c.identify.nsid = cns ? 0 : cpu_to_le32(ns->ns_id);
-	c.identify.cns = cpu_to_le32(cns);
-
-	return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
-}
-
-static int nvme_get_range_type(struct nvme_ns *ns, unsigned long addr)
-{
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.features.opcode = nvme_admin_get_features;
-	c.features.nsid = cpu_to_le32(ns->ns_id);
-	c.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
-
-	return nvme_submit_user_admin_command(ns->dev, addr, 4096, &c);
-}
-
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
 	struct nvme_dev *dev = ns->dev;
@@ -1096,10 +1051,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	switch (io.opcode) {
 	case nvme_cmd_write:
 	case nvme_cmd_read:
+	case nvme_cmd_compare:
 		nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr,
 								length, &sg);
 	default:
-		return -EFAULT;
+		return -EINVAL;
 	}
 
 	if (nents < 0)
@@ -1137,70 +1093,66 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	return status;
 }
 
-static int nvme_download_firmware(struct nvme_ns *ns,
-						struct nvme_dlfw __user *udlfw)
+static int nvme_user_admin_cmd(struct nvme_ns *ns,
+					struct nvme_admin_cmd __user *ucmd)
 {
 	struct nvme_dev *dev = ns->dev;
-	struct nvme_dlfw dlfw;
+	struct nvme_admin_cmd cmd;
 	struct nvme_command c;
-	int nents, status, length;
+	int status, length, nents = 0;
 	struct scatterlist *sg;
-	struct nvme_prps *prps;
+	struct nvme_prps *prps = NULL;
 
-	if (copy_from_user(&dlfw, udlfw, sizeof(dlfw)))
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
-	if (dlfw.length >= (1 << 30))
-		return -EINVAL;
-	length = dlfw.length * 4;
-
-	nents = nvme_map_user_pages(dev, 1, dlfw.addr, length, &sg);
-	if (nents < 0)
-		return nents;
 
 	memset(&c, 0, sizeof(c));
-	c.dlfw.opcode = nvme_admin_download_fw;
-	c.dlfw.numd = cpu_to_le32(dlfw.length);
-	c.dlfw.offset = cpu_to_le32(dlfw.offset);
-	prps = nvme_setup_prps(dev, &c.common, sg, &length, GFP_KERNEL);
-	if (length != dlfw.length * 4)
+	c.common.opcode = cmd.opcode;
+	c.common.flags = cmd.flags;
+	c.common.nsid = cpu_to_le32(cmd.nsid);
+	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+	c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
+	c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
+	c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
+	c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
+	c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
+	c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
+
+	length = cmd.data_len;
+	if (cmd.data_len) {
+		nents = nvme_map_user_pages(dev, 1, cmd.addr, length, &sg);
+		if (nents < 0)
+			return nents;
+		prps = nvme_setup_prps(dev, &c.common, sg, &length, GFP_KERNEL);
+	}
+
+	if (length != cmd.data_len)
 		status = -ENOMEM;
 	else
 		status = nvme_submit_admin_cmd(dev, &c, NULL);
-	nvme_unmap_user_pages(dev, 0, dlfw.addr, dlfw.length * 4, sg, nents);
-	nvme_free_prps(dev, prps);
+	if (cmd.data_len) {
+		nvme_unmap_user_pages(dev, 0, cmd.addr, cmd.data_len, sg,
+									nents);
+		nvme_free_prps(dev, prps);
+	}
 	return status;
 }
 
-static int nvme_activate_firmware(struct nvme_ns *ns, unsigned long arg)
-{
-	struct nvme_dev *dev = ns->dev;
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_activate_fw;
-	c.common.rsvd10[0] = cpu_to_le32(arg);
-
-	return nvme_submit_admin_cmd(dev, &c, NULL);
-}
-
 static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 							unsigned long arg)
 {
 	struct nvme_ns *ns = bdev->bd_disk->private_data;
 
 	switch (cmd) {
-	case NVME_IOCTL_IDENTIFY_NS:
-		return nvme_identify(ns, arg, 0);
-	case NVME_IOCTL_IDENTIFY_CTRL:
-		return nvme_identify(ns, arg, 1);
-	case NVME_IOCTL_GET_RANGE_TYPE:
-		return nvme_get_range_type(ns, arg);
+	case NVME_IOCTL_ID:
+		return ns->ns_id;
+	case NVME_IOCTL_ADMIN_CMD:
+		return nvme_user_admin_cmd(ns, (void __user *)arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
-	case NVME_IOCTL_DOWNLOAD_FW:
-		return nvme_download_firmware(ns, (void __user *)arg);
-	case NVME_IOCTL_ACTIVATE_FW:
-		return nvme_activate_firmware(ns, arg);
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a19304fefa7d..c96ab0f5ef6f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -153,11 +153,11 @@ struct nvme_common_command {
 	__u8			flags;
 	__u16			command_id;
 	__le32			nsid;
-	__u64			rsvd2;
+	__u32			cdw2[2];
 	__le64			metadata;
 	__le64			prp1;
 	__le64			prp2;
-	__u32			rsvd10[6];
+	__u32			cdw10[6];
 };
 
 struct nvme_rw_command {
@@ -388,17 +388,29 @@ struct nvme_user_io {
 	__u16	appmask;
 };
 
-struct nvme_dlfw {
+struct nvme_admin_cmd {
+	__u8	opcode;
+	__u8	flags;
+	__u16	rsvd1;
+	__u32	nsid;
+	__u32	cdw2;
+	__u32	cdw3;
+	__u64	metadata;
 	__u64	addr;
-	__u32	length;	/* In dwords */
-	__u32	offset;	/* In dwords */
+	__u32	metadata_len;
+	__u32	data_len;
+	__u32	cdw10;
+	__u32	cdw11;
+	__u32	cdw12;
+	__u32	cdw13;
+	__u32	cdw14;
+	__u32	cdw15;
+	__u32	timeout_ms;
+	__u32	result;
 };
 
-#define NVME_IOCTL_IDENTIFY_NS	_IOW('N', 0x40, struct nvme_id_ns)
-#define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
-#define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
-#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x43, struct nvme_user_io)
-#define NVME_IOCTL_DOWNLOAD_FW	_IOW('N', 0x44, struct nvme_dlfw)
-#define NVME_IOCTL_ACTIVATE_FW	_IO('N', 0x45)
+#define NVME_IOCTL_ID		_IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x42, struct nvme_user_io)
 
 #endif /* _LINUX_NVME_H */
-- 
cgit v1.2.3


From f1938f6e1ee1583c87ec74dc406fdd8694e99ac8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Thu, 20 Oct 2011 17:00:41 -0400
Subject: NVMe: Implement doorbell stride capability

The doorbell stride allows devices to spread out their doorbells instead
of packing them tightly.  This feature was added as part of ECN 003.

This patch also enables support for more than 512 queues :-)

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme.c | 17 ++++++++++++++---
 include/linux/nvme.h |  1 +
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index cfe5932821d8..a17f80fa3881 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -70,6 +70,7 @@ struct nvme_dev {
 	struct dma_pool *prp_small_pool;
 	int instance;
 	int queue_count;
+	int db_stride;
 	u32 ctrl_config;
 	struct msix_entry *entry;
 	struct nvme_bar __iomem *bar;
@@ -672,7 +673,7 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
 	if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
 		return IRQ_NONE;
 
-	writel(head, nvmeq->q_db + 1);
+	writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride));
 	nvmeq->cq_head = head;
 	nvmeq->cq_phase = phase;
 
@@ -889,7 +890,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	init_waitqueue_head(&nvmeq->sq_full);
 	init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
 	bio_list_init(&nvmeq->sq_cong);
-	nvmeq->q_db = &dev->dbs[qid * 2];
+	nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
 	nvmeq->q_depth = depth;
 	nvmeq->cq_vector = vector;
 
@@ -981,6 +982,7 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
 
 	cap = readq(&dev->bar->cap);
 	timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+	dev->db_stride = NVME_CAP_STRIDE(cap);
 
 	while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
 		msleep(100);
@@ -1357,7 +1359,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
 
 static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
 {
-	int result, cpu, i, nr_io_queues;
+	int result, cpu, i, nr_io_queues, db_bar_size;
 
 	nr_io_queues = num_online_cpus();
 	result = set_queue_count(dev, nr_io_queues);
@@ -1369,6 +1371,15 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
 	/* Deregister the admin queue's interrupt */
 	free_irq(dev->entry[0].vector, dev->queues[0]);
 
+	db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
+	if (db_bar_size > 8192) {
+		iounmap(dev->bar);
+		dev->bar = ioremap(pci_resource_start(dev->pci_dev, 0),
+								db_bar_size);
+		dev->dbs = ((void __iomem *)dev->bar) + 4096;
+		dev->queues[0]->q_db = dev->dbs;
+	}
+
 	for (i = 0; i < nr_io_queues; i++)
 		dev->entry[i].entry = i;
 	for (;;) {
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c96ab0f5ef6f..2a2c535c8345 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -36,6 +36,7 @@ struct nvme_bar {
 };
 
 #define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
+#define NVME_CAP_STRIDE(cap)	(((cap) >> 32) & 0xf)
 
 enum {
 	NVME_CC_ENABLE		= 1 << 0,
-- 
cgit v1.2.3


From 010e646ba2fdfc558048a97da746381c35836280 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 4 Nov 2011 16:24:23 -0400
Subject: NVMe: Update Identify Controller data structure

The driver was still using an old definition of Identify Controller
which only came to light once we started using the 'number of namespaces'
field properly.

Reported-by: Nisheeth Bhat <nisheeth.bhat@intel.com>
Reported-by: Khosrow Panah <Khosrow.Panah@idt.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/linux/nvme.h | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2a2c535c8345..9490a00529f4 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -57,6 +57,18 @@ enum {
 	NVME_CSTS_SHST_CMPLT	= 2 << 2,
 };
 
+struct nvme_id_power_state {
+	__le16			max_power;	/* centiwatts */
+	__u16			rsvd2;
+	__le32			entry_lat;	/* microseconds */
+	__le32			exit_lat;	/* microseconds */
+	__u8			read_tput;
+	__u8			read_lat;
+	__u8			write_tput;
+	__u8			write_lat;
+	__u8			rsvd16[16];
+};
+
 #define NVME_VS(major, minor)	(major << 16 | minor)
 
 struct nvme_id_ctrl {
@@ -65,9 +77,11 @@ struct nvme_id_ctrl {
 	char			sn[20];
 	char			mn[40];
 	char			fr[8];
-	__le32			nn;
 	__u8			rab;
-	__u8			rsvd77[178];
+	__u8			ieee[3];
+	__u8			mic;
+	__u8			mdts;
+	__u8			rsvd78[178];
 	__le16			oacs;
 	__u8			acl;
 	__u8			aerl;
@@ -76,15 +90,18 @@ struct nvme_id_ctrl {
 	__u8			elpe;
 	__u8			npss;
 	__u8			rsvd264[248];
-	__le64			psd[32];
+	__u8			sqes;
+	__u8			cqes;
+	__u8			rsvd514[2];
+	__le32			nn;
 	__le16			oncs;
 	__le16			fuses;
 	__u8			fna;
 	__u8			vwc;
 	__le16			awun;
 	__le16			awupf;
-	__u8			rsvd778[246];
-	__u8			cmdset[2048];
+	__u8			rsvd530[1518];
+	struct nvme_id_power_state	psd[32];
 	__u8			vs[1024];
 };
 
-- 
cgit v1.2.3


From b14dab792dee3245b628e046d80a7fad5573fea6 Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Thu, 13 Oct 2011 12:33:30 +0530
Subject: DMAEngine: Define interleaved transfer request api

Define a new api that could be used for doing fancy data transfers
like interleaved to contiguous copy and vice-versa.
Traditional SG_list based transfers tend to be very inefficient in
such cases as where the interleave and chunk are only a few bytes,
which call for a very condensed api to convey pattern of the transfer.
This api supports all 4 variants of scatter-gather and contiguous transfer.

Of course, neither can this api help transfers that don't lend to DMA by
nature, i.e, scattered tiny read/writes with no periodic pattern.

Also since now we support SLAVE channels that might not provide
device_prep_slave_sg callback but device_prep_interleaved_dma,
remove the BUG_ON check.

Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
Acked-by: Barry Song <Baohua.Song@csr.com>
[renamed dmaxfer_template to dma_interleaved_template
 did fixup after the enum dma_transfer_merge]
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 Documentation/dmaengine.txt |  8 +++++
 drivers/dma/dmaengine.c     |  4 +--
 include/linux/dmaengine.h   | 78 +++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 85 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt
index 94b7e0f96b38..bbe6cb3d1856 100644
--- a/Documentation/dmaengine.txt
+++ b/Documentation/dmaengine.txt
@@ -75,6 +75,10 @@ The slave DMA usage consists of following steps:
    slave_sg	- DMA a list of scatter gather buffers from/to a peripheral
    dma_cyclic	- Perform a cyclic DMA operation from/to a peripheral till the
 		  operation is explicitly stopped.
+   interleaved_dma - This is common to Slave as well as M2M clients. For slave
+		 address of devices' fifo could be already known to the driver.
+		 Various types of operations could be expressed by setting
+		 appropriate values to the 'dma_interleaved_template' members.
 
    A non-NULL return of this transfer API represents a "descriptor" for
    the given transaction.
@@ -89,6 +93,10 @@ The slave DMA usage consists of following steps:
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_data_direction direction);
 
+	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags);
+
    The peripheral driver is expected to have mapped the scatterlist for
    the DMA operation prior to calling device_prep_slave_sg, and must
    keep the scatterlist mapped until the DMA operation has completed.
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index b48967b499da..a6c6051ec858 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -693,12 +693,12 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_interrupt);
 	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
 		!device->device_prep_dma_sg);
-	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
-		!device->device_prep_slave_sg);
 	BUG_ON(dma_has_cap(DMA_CYCLIC, device->cap_mask) &&
 		!device->device_prep_dma_cyclic);
 	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
 		!device->device_control);
+	BUG_ON(dma_has_cap(DMA_INTERLEAVE, device->cap_mask) &&
+		!device->device_prep_interleaved_dma);
 
 	BUG_ON(!device->device_alloc_chan_resources);
 	BUG_ON(!device->device_free_chan_resources);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index a865b3a354cd..5532bb8b500c 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -71,10 +71,10 @@ enum dma_transaction_type {
 	DMA_ASYNC_TX,
 	DMA_SLAVE,
 	DMA_CYCLIC,
-};
-
+	DMA_INTERLEAVE,
 /* last transaction type for creation of the capabilities mask */
-#define DMA_TX_TYPE_END (DMA_CYCLIC + 1)
+	DMA_TX_TYPE_END,
+};
 
 /**
  * enum dma_transfer_direction - dma transfer mode and direction indicator
@@ -90,6 +90,74 @@ enum dma_transfer_direction {
 	DMA_DEV_TO_DEV,
 };
 
+/**
+ * Interleaved Transfer Request
+ * ----------------------------
+ * A chunk is collection of contiguous bytes to be transfered.
+ * The gap(in bytes) between two chunks is called inter-chunk-gap(ICG).
+ * ICGs may or maynot change between chunks.
+ * A FRAME is the smallest series of contiguous {chunk,icg} pairs,
+ *  that when repeated an integral number of times, specifies the transfer.
+ * A transfer template is specification of a Frame, the number of times
+ *  it is to be repeated and other per-transfer attributes.
+ *
+ * Practically, a client driver would have ready a template for each
+ *  type of transfer it is going to need during its lifetime and
+ *  set only 'src_start' and 'dst_start' before submitting the requests.
+ *
+ *
+ *  |      Frame-1        |       Frame-2       | ~ |       Frame-'numf'  |
+ *  |====....==.===...=...|====....==.===...=...| ~ |====....==.===...=...|
+ *
+ *    ==  Chunk size
+ *    ... ICG
+ */
+
+/**
+ * struct data_chunk - Element of scatter-gather list that makes a frame.
+ * @size: Number of bytes to read from source.
+ *	  size_dst := fn(op, size_src), so doesn't mean much for destination.
+ * @icg: Number of bytes to jump after last src/dst address of this
+ *	 chunk and before first src/dst address for next chunk.
+ *	 Ignored for dst(assumed 0), if dst_inc is true and dst_sgl is false.
+ *	 Ignored for src(assumed 0), if src_inc is true and src_sgl is false.
+ */
+struct data_chunk {
+	size_t size;
+	size_t icg;
+};
+
+/**
+ * struct dma_interleaved_template - Template to convey DMAC the transfer pattern
+ *	 and attributes.
+ * @src_start: Bus address of source for the first chunk.
+ * @dst_start: Bus address of destination for the first chunk.
+ * @dir: Specifies the type of Source and Destination.
+ * @src_inc: If the source address increments after reading from it.
+ * @dst_inc: If the destination address increments after writing to it.
+ * @src_sgl: If the 'icg' of sgl[] applies to Source (scattered read).
+ *		Otherwise, source is read contiguously (icg ignored).
+ *		Ignored if src_inc is false.
+ * @dst_sgl: If the 'icg' of sgl[] applies to Destination (scattered write).
+ *		Otherwise, destination is filled contiguously (icg ignored).
+ *		Ignored if dst_inc is false.
+ * @numf: Number of frames in this template.
+ * @frame_size: Number of chunks in a frame i.e, size of sgl[].
+ * @sgl: Array of {chunk,icg} pairs that make up a frame.
+ */
+struct dma_interleaved_template {
+	dma_addr_t src_start;
+	dma_addr_t dst_start;
+	enum dma_transfer_direction dir;
+	bool src_inc;
+	bool dst_inc;
+	bool src_sgl;
+	bool dst_sgl;
+	size_t numf;
+	size_t frame_size;
+	struct data_chunk sgl[0];
+};
+
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
  *  control completion, and communicate status.
@@ -445,6 +513,7 @@ struct dma_tx_state {
  * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
  *	The function takes a buffer of size buf_len. The callback function will
  *	be called after period_len bytes have been transferred.
+ * @device_prep_interleaved_dma: Transfer expression in a generic way.
  * @device_control: manipulate all pending operations on a channel, returns
  *	zero or error code
  * @device_tx_status: poll for transaction completion, the optional
@@ -509,6 +578,9 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction);
+	struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
+		struct dma_chan *chan, struct dma_interleaved_template *xt,
+		unsigned long flags);
 	int (*device_control)(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		unsigned long arg);
 
-- 
cgit v1.2.3


From ca21a146a45a179a2a7bc86d938a2fbf571a7510 Mon Sep 17 00:00:00 2001
From: Rongjun Ying <Rongjun.Ying@csr.com>
Date: Thu, 27 Oct 2011 19:22:39 -0700
Subject: dmaengine: add CSR SiRFprimaII DMAC driver

Cc: Jassi Brar <jaswinder.singh@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rongjun Ying <rongjun.ying@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
[fixed direction enums and cyclic api based on changes
 already merged]
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 MAINTAINERS                 |   1 +
 drivers/dma/Kconfig         |   7 +
 drivers/dma/Makefile        |   1 +
 drivers/dma/sirf-dma.c      | 717 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sirfsoc_dma.h |   6 +
 5 files changed, 732 insertions(+)
 create mode 100644 drivers/dma/sirf-dma.c
 create mode 100644 include/linux/sirfsoc_dma.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 4808256446f2..1b141d71ea13 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -749,6 +749,7 @@ M:	Barry Song <baohua.song@csr.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-prima2/
+F:	drivers/dma/sirf-dma*
 
 ARM/EBSA110 MACHINE SUPPORT
 M:	Russell King <linux@arm.linux.org.uk>
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 7ec0d6cef0c3..f1a274994bb1 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -187,6 +187,13 @@ config TIMB_DMA
 	help
 	  Enable support for the Timberdale FPGA DMA engine.
 
+config SIRF_DMA
+	tristate "CSR SiRFprimaII DMA support"
+	depends on ARCH_PRIMA2
+	select DMA_ENGINE
+	help
+	  Enable support for the CSR SiRFprimaII DMA engine.
+
 config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 30cf3b1f0c5c..009a222e8283 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
+obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PCH_DMA) += pch_dma.o
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
new file mode 100644
index 000000000000..55ec67997670
--- /dev/null
+++ b/drivers/dma/sirf-dma.c
@@ -0,0 +1,717 @@
+/*
+ * DMA controller driver for CSR SiRFprimaII
+ *
+ * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/sirfsoc_dma.h>
+
+#define SIRFSOC_DMA_DESCRIPTORS                 16
+#define SIRFSOC_DMA_CHANNELS                    16
+
+#define SIRFSOC_DMA_CH_ADDR                     0x00
+#define SIRFSOC_DMA_CH_XLEN                     0x04
+#define SIRFSOC_DMA_CH_YLEN                     0x08
+#define SIRFSOC_DMA_CH_CTRL                     0x0C
+
+#define SIRFSOC_DMA_WIDTH_0                     0x100
+#define SIRFSOC_DMA_CH_VALID                    0x140
+#define SIRFSOC_DMA_CH_INT                      0x144
+#define SIRFSOC_DMA_INT_EN                      0x148
+#define SIRFSOC_DMA_CH_LOOP_CTRL                0x150
+
+#define SIRFSOC_DMA_MODE_CTRL_BIT               4
+#define SIRFSOC_DMA_DIR_CTRL_BIT                5
+
+/* xlen and dma_width register is in 4 bytes boundary */
+#define SIRFSOC_DMA_WORD_LEN			4
+
+struct sirfsoc_dma_desc {
+	struct dma_async_tx_descriptor	desc;
+	struct list_head		node;
+
+	/* SiRFprimaII 2D-DMA parameters */
+
+	int             xlen;           /* DMA xlen */
+	int             ylen;           /* DMA ylen */
+	int             width;          /* DMA width */
+	int             dir;
+	bool            cyclic;         /* is loop DMA? */
+	u32             addr;		/* DMA buffer address */
+};
+
+struct sirfsoc_dma_chan {
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		queued;
+	struct list_head		active;
+	struct list_head		completed;
+	dma_cookie_t			completed_cookie;
+	unsigned long			happened_cyclic;
+	unsigned long			completed_cyclic;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+
+	int				mode;
+};
+
+struct sirfsoc_dma {
+	struct dma_device		dma;
+	struct tasklet_struct		tasklet;
+	struct sirfsoc_dma_chan		channels[SIRFSOC_DMA_CHANNELS];
+	void __iomem			*base;
+	int				irq;
+};
+
+#define DRV_NAME	"sirfsoc_dma"
+
+/* Convert struct dma_chan to struct sirfsoc_dma_chan */
+static inline
+struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct sirfsoc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct sirfsoc_dma */
+static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(c);
+	return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
+}
+
+/* Execute all queued DMA descriptors */
+static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+
+	/*
+	 * lock has been held by functions calling this, so we don't hold
+	 * lock again
+	 */
+
+	sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
+		node);
+	/* Move the first queued descriptor to active list */
+	list_move_tail(&schan->queued, &schan->active);
+
+	/* Start the DMA transfer */
+	writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 +
+		cid * 4);
+	writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		(sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 +
+		SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 +
+		SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) |
+		(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+
+	/*
+	 * writel has an implict memory write barrier to make sure data is
+	 * flushed into memory before starting DMA
+	 */
+	writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+			readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL),
+			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		schan->happened_cyclic = schan->completed_cyclic = 0;
+	}
+}
+
+/* Interrupt handler */
+static irqreturn_t sirfsoc_dma_irq(int irq, void *data)
+{
+	struct sirfsoc_dma *sdma = data;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	u32 is;
+	int ch;
+
+	is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
+	while ((ch = fls(is) - 1) >= 0) {
+		is &= ~(1 << ch);
+		writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT);
+		schan = &sdma->channels[ch];
+
+		spin_lock(&schan->lock);
+
+		sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+			node);
+		if (!sdesc->cyclic) {
+			/* Execute queued descriptors */
+			list_splice_tail_init(&schan->active, &schan->completed);
+			if (!list_empty(&schan->queued))
+				sirfsoc_dma_execute(schan);
+		} else
+			schan->happened_cyclic++;
+
+		spin_unlock(&schan->lock);
+	}
+
+	/* Schedule tasklet */
+	tasklet_schedule(&sdma->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/* process completed descriptors */
+static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
+{
+	dma_cookie_t last_cookie = 0;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc;
+	struct dma_async_tx_descriptor *desc;
+	unsigned long flags;
+	unsigned long happened_cyclic;
+	LIST_HEAD(list);
+	int i;
+
+	for (i = 0; i < sdma->dma.chancnt; i++) {
+		schan = &sdma->channels[i];
+
+		/* Get all completed descriptors */
+		spin_lock_irqsave(&schan->lock, flags);
+		if (!list_empty(&schan->completed)) {
+			list_splice_tail_init(&schan->completed, &list);
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			/* Execute callbacks and run dependencies */
+			list_for_each_entry(sdesc, &list, node) {
+				desc = &sdesc->desc;
+
+				if (desc->callback)
+					desc->callback(desc->callback_param);
+
+				last_cookie = desc->cookie;
+				dma_run_dependencies(desc);
+			}
+
+			/* Free descriptors */
+			spin_lock_irqsave(&schan->lock, flags);
+			list_splice_tail_init(&list, &schan->free);
+			schan->completed_cookie = last_cookie;
+			spin_unlock_irqrestore(&schan->lock, flags);
+		} else {
+			/* for cyclic channel, desc is always in active list */
+			sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
+				node);
+
+			if (!sdesc || (sdesc && !sdesc->cyclic)) {
+				/* without active cyclic DMA */
+				spin_unlock_irqrestore(&schan->lock, flags);
+				continue;
+			}
+
+			/* cyclic DMA */
+			happened_cyclic = schan->happened_cyclic;
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			desc = &sdesc->desc;
+			while (happened_cyclic != schan->completed_cyclic) {
+				if (desc->callback)
+					desc->callback(desc->callback_param);
+				schan->completed_cyclic++;
+			}
+		}
+	}
+}
+
+/* DMA Tasklet */
+static void sirfsoc_dma_tasklet(unsigned long data)
+{
+	struct sirfsoc_dma *sdma = (void *)data;
+
+	sirfsoc_dma_process_completed(sdma);
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(txd->chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	sdesc = container_of(txd, struct sirfsoc_dma_desc, desc);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Move descriptor to queue */
+	list_move_tail(&sdesc->node, &schan->queued);
+
+	/* Update cookie */
+	cookie = schan->chan.cookie + 1;
+	if (cookie <= 0)
+		cookie = 1;
+
+	schan->chan.cookie = cookie;
+	sdesc->desc.cookie = cookie;
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return cookie;
+}
+
+static int sirfsoc_dma_slave_config(struct sirfsoc_dma_chan *schan,
+	struct dma_slave_config *config)
+{
+	unsigned long flags;
+
+	if ((config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+		(config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES))
+		return -EINVAL;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	schan->mode = (config->src_maxburst == 4 ? 1 : 0);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_terminate_all(struct sirfsoc_dma_chan *schan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	unsigned long flags;
+
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
+		~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+	writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+
+	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
+		& ~((1 << cid) | 1 << (cid + 16)),
+			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+
+	spin_lock_irqsave(&schan->lock, flags);
+	list_splice_tail_init(&schan->active, &schan->free);
+	list_splice_tail_init(&schan->queued, &schan->free);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+	unsigned long arg)
+{
+	struct dma_slave_config *config;
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		return sirfsoc_dma_terminate_all(schan);
+	case DMA_SLAVE_CONFIG:
+		config = (struct dma_slave_config *)arg;
+		return sirfsoc_dma_slave_config(schan, config);
+
+	default:
+		break;
+	}
+
+	return -ENOSYS;
+}
+
+/* Alloc channel resources */
+static int sirfsoc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	LIST_HEAD(descs);
+	int i;
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) {
+		sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL);
+		if (!sdesc) {
+			dev_notice(sdma->dma.dev, "Memory allocation error. "
+				"Allocated only %u descriptors\n", i);
+			break;
+		}
+
+		dma_async_tx_descriptor_init(&sdesc->desc, chan);
+		sdesc->desc.flags = DMA_CTRL_ACK;
+		sdesc->desc.tx_submit = sirfsoc_dma_tx_submit;
+
+		list_add_tail(&sdesc->node, &descs);
+	}
+
+	/* Return error only if no descriptors were allocated */
+	if (i == 0)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	list_splice_tail_init(&descs, &schan->free);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return i;
+}
+
+/* Free channel resources */
+static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc, *tmp;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Channel must be idle */
+	BUG_ON(!list_empty(&schan->prepared));
+	BUG_ON(!list_empty(&schan->queued));
+	BUG_ON(!list_empty(&schan->active));
+	BUG_ON(!list_empty(&schan->completed));
+
+	/* Move data */
+	list_splice_tail_init(&schan->free, &descs);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(sdesc, tmp, &descs, node)
+		kfree(sdesc);
+}
+
+/* Send pending descriptor to hardware */
+static void sirfsoc_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	if (list_empty(&schan->active) && !list_empty(&schan->queued))
+		sirfsoc_dma_execute(schan);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+}
+
+/* Check request completion status */
+static enum dma_status
+sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+	struct dma_tx_state *txstate)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	last_used = schan->chan.cookie;
+	last_complete = schan->completed_cookie;
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	dma_set_tx_state(txstate, last_complete, last_used, 0);
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+	int ret;
+
+	if ((xt->dir != DMA_MEM_TO_DEV) || (xt->dir != DMA_DEV_TO_MEM)) {
+		ret = -EINVAL;
+		goto err_dir;
+	}
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc) {
+		/* try to free completed descriptors */
+		sirfsoc_dma_process_completed(sdma);
+		ret = 0;
+		goto no_desc;
+	}
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+
+	/*
+	 * Number of chunks in a frame can only be 1 for prima2
+	 * and ylen (number of frame - 1) must be at least 0
+	 */
+	if ((xt->frame_size == 1) && (xt->numf > 0)) {
+		sdesc->cyclic = 0;
+		sdesc->xlen = xt->sgl[0].size / SIRFSOC_DMA_WORD_LEN;
+		sdesc->width = (xt->sgl[0].size + xt->sgl[0].icg) /
+				SIRFSOC_DMA_WORD_LEN;
+		sdesc->ylen = xt->numf - 1;
+		if (xt->dir == DMA_MEM_TO_DEV) {
+			sdesc->addr = xt->src_start;
+			sdesc->dir = 1;
+		} else {
+			sdesc->addr = xt->dst_start;
+			sdesc->dir = 0;
+		}
+
+		list_add_tail(&sdesc->node, &schan->prepared);
+	} else {
+		pr_err("sirfsoc DMA Invalid xfer\n");
+		ret = -EINVAL;
+		goto err_xfer;
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+err_xfer:
+	spin_unlock_irqrestore(&schan->lock, iflags);
+no_desc:
+err_dir:
+	return ERR_PTR(ret);
+}
+
+static struct dma_async_tx_descriptor *
+sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
+	size_t buf_len, size_t period_len,
+	enum dma_transfer_direction direction)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+
+	/*
+	 * we only support cycle transfer with 2 period
+	 * If the X-length is set to 0, it would be the loop mode.
+	 * The DMA address keeps increasing until reaching the end of a loop
+	 * area whose size is defined by (DMA_WIDTH x (Y_LENGTH + 1)). Then
+	 * the DMA address goes back to the beginning of this area.
+	 * In loop mode, the DMA data region is divided into two parts, BUFA
+	 * and BUFB. DMA controller generates interrupts twice in each loop:
+	 * when the DMA address reaches the end of BUFA or the end of the
+	 * BUFB
+	 */
+	if (buf_len !=  2 * period_len)
+		return ERR_PTR(-EINVAL);
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc)
+		return 0;
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+	sdesc->addr = addr;
+	sdesc->cyclic = 1;
+	sdesc->xlen = 0;
+	sdesc->ylen = buf_len / SIRFSOC_DMA_WORD_LEN - 1;
+	sdesc->width = 1;
+	list_add_tail(&sdesc->node, &schan->prepared);
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+}
+
+/*
+ * The DMA controller consists of 16 independent DMA channels.
+ * Each channel is allocated to a different function
+ */
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	unsigned int ch_nr = (unsigned int) chan_id;
+
+	if (ch_nr == chan->chan_id +
+		chan->device->dev_id * SIRFSOC_DMA_CHANNELS)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(sirfsoc_dma_filter_id);
+
+static int __devinit sirfsoc_dma_probe(struct platform_device *op)
+{
+	struct device_node *dn = op->dev.of_node;
+	struct device *dev = &op->dev;
+	struct dma_device *dma;
+	struct sirfsoc_dma *sdma;
+	struct sirfsoc_dma_chan *schan;
+	struct resource res;
+	ulong regs_start, regs_size;
+	u32 id;
+	int ret, i;
+
+	sdma = devm_kzalloc(dev, sizeof(*sdma), GFP_KERNEL);
+	if (!sdma) {
+		dev_err(dev, "Memory exhausted!\n");
+		return -ENOMEM;
+	}
+
+	if (of_property_read_u32(dn, "cell-index", &id)) {
+		dev_err(dev, "Fail to get DMAC index\n");
+		ret = -ENODEV;
+		goto free_mem;
+	}
+
+	sdma->irq = irq_of_parse_and_map(dn, 0);
+	if (sdma->irq == NO_IRQ) {
+		dev_err(dev, "Error mapping IRQ!\n");
+		ret = -EINVAL;
+		goto free_mem;
+	}
+
+	ret = of_address_to_resource(dn, 0, &res);
+	if (ret) {
+		dev_err(dev, "Error parsing memory region!\n");
+		goto free_mem;
+	}
+
+	regs_start = res.start;
+	regs_size = resource_size(&res);
+
+	sdma->base = devm_ioremap(dev, regs_start, regs_size);
+	if (!sdma->base) {
+		dev_err(dev, "Error mapping memory region!\n");
+		ret = -ENOMEM;
+		goto irq_dispose;
+	}
+
+	ret = devm_request_irq(dev, sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME,
+		sdma);
+	if (ret) {
+		dev_err(dev, "Error requesting IRQ!\n");
+		ret = -EINVAL;
+		goto unmap_mem;
+	}
+
+	dma = &sdma->dma;
+	dma->dev = dev;
+	dma->chancnt = SIRFSOC_DMA_CHANNELS;
+
+	dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
+	dma->device_issue_pending = sirfsoc_dma_issue_pending;
+	dma->device_control = sirfsoc_dma_control;
+	dma->device_tx_status = sirfsoc_dma_tx_status;
+	dma->device_prep_interleaved_dma = sirfsoc_dma_prep_interleaved;
+	dma->device_prep_dma_cyclic = sirfsoc_dma_prep_cyclic;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma->cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+	for (i = 0; i < dma->chancnt; i++) {
+		schan = &sdma->channels[i];
+
+		schan->chan.device = dma;
+		schan->chan.cookie = 1;
+		schan->completed_cookie = schan->chan.cookie;
+
+		INIT_LIST_HEAD(&schan->free);
+		INIT_LIST_HEAD(&schan->prepared);
+		INIT_LIST_HEAD(&schan->queued);
+		INIT_LIST_HEAD(&schan->active);
+		INIT_LIST_HEAD(&schan->completed);
+
+		spin_lock_init(&schan->lock);
+		list_add_tail(&schan->chan.device_node, &dma->channels);
+	}
+
+	tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma);
+
+	/* Register DMA engine */
+	dev_set_drvdata(dev, sdma);
+	ret = dma_async_device_register(dma);
+	if (ret)
+		goto free_irq;
+
+	dev_info(dev, "initialized SIRFSOC DMAC driver\n");
+
+	return 0;
+
+free_irq:
+	devm_free_irq(dev, sdma->irq, sdma);
+irq_dispose:
+	irq_dispose_mapping(sdma->irq);
+unmap_mem:
+	iounmap(sdma->base);
+free_mem:
+	devm_kfree(dev, sdma);
+	return ret;
+}
+
+static int __devexit sirfsoc_dma_remove(struct platform_device *op)
+{
+	struct device *dev = &op->dev;
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+
+	dma_async_device_unregister(&sdma->dma);
+	devm_free_irq(dev, sdma->irq, sdma);
+	irq_dispose_mapping(sdma->irq);
+	iounmap(sdma->base);
+	devm_kfree(dev, sdma);
+	return 0;
+}
+
+static struct of_device_id sirfsoc_dma_match[] = {
+	{ .compatible = "sirf,prima2-dmac", },
+	{},
+};
+
+static struct platform_driver sirfsoc_dma_driver = {
+	.probe		= sirfsoc_dma_probe,
+	.remove		= __devexit_p(sirfsoc_dma_remove),
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table	= sirfsoc_dma_match,
+	},
+};
+
+static int __init sirfsoc_dma_init(void)
+{
+	return platform_driver_register(&sirfsoc_dma_driver);
+}
+module_init(sirfsoc_dma_init);
+
+static void __exit sirfsoc_dma_exit(void)
+{
+	platform_driver_unregister(&sirfsoc_dma_driver);
+}
+module_exit(sirfsoc_dma_exit);
+
+MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, "
+	"Barry Song <baohua.song@csr.com>");
+MODULE_DESCRIPTION("SIRFSOC DMA control driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/sirfsoc_dma.h b/include/linux/sirfsoc_dma.h
new file mode 100644
index 000000000000..29d959333d81
--- /dev/null
+++ b/include/linux/sirfsoc_dma.h
@@ -0,0 +1,6 @@
+#ifndef _SIRFSOC_DMA_H_
+#define _SIRFSOC_DMA_H_
+
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id);
+
+#endif
-- 
cgit v1.2.3


From 62268ce9170c5466332c046ff6ddafcb67751502 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Tue, 13 Dec 2011 23:48:03 +0800
Subject: dmaengine: add DMA_TRANS_NONE to dma_transfer_direction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before dma_transfer_direction was introduced to replace
dma_data_direction, some dmaengine device uses DMA_NONE of
dma_data_direction for some talk with its client drivers.
The mxs-dma and its clients mxs-mmc and gpmi-nand are such case.

This patch adds DMA_TRANS_NONE to dma_transfer_direction and
migrate the DMA_NONE use in mxs-dma to it.

It also fixes the compile warning below.

CC      drivers/dma/mxs-dma.o
drivers/dma/mxs-dma.c: In function ‘mxs_dma_prep_slave_sg’:
drivers/dma/mxs-dma.c:420:16: warning: comparison between ‘enum dma_transfer_direction’ and ‘enum dma_data_direction’

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/mxs-dma.c     | 2 +-
 include/linux/dmaengine.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 6548595c26dc..493af2f6e33a 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -391,7 +391,7 @@ static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
 		idx = 0;
 	}
 
-	if (direction == DMA_NONE) {
+	if (direction == DMA_TRANS_NONE) {
 		ccw = &mxs_chan->ccw[idx++];
 		pio = (u32 *) sgl;
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 5532bb8b500c..679b349d9b66 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -88,6 +88,7 @@ enum dma_transfer_direction {
 	DMA_MEM_TO_DEV,
 	DMA_DEV_TO_MEM,
 	DMA_DEV_TO_DEV,
+	DMA_TRANS_NONE,
 };
 
 /**
-- 
cgit v1.2.3


From 4e82786f7039cb707c031dcf0dc84c025e149487 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Tue, 13 Dec 2011 23:48:05 +0800
Subject: mtd: fix compile error for gpmi-nand

The driver gpmi-nand should compile at least.  This patch adds the
missing gpmi-nand.h to fix the compile error below.

  CC      drivers/mtd/nand/gpmi-nand/gpmi-nand.o
  CC      drivers/mtd/nand/gpmi-nand/gpmi-lib.o
drivers/mtd/nand/gpmi-nand/gpmi-nand.c:25:33: fatal error: linux/mtd/gpmi-nand.h: No such file or directory
drivers/mtd/nand/gpmi-nand/gpmi-lib.c:21:33: fatal error: linux/mtd/gpmi-nand.h: No such file or directory

This header is grabbed from patch below, which has not been postponed
for merging.

  [PATCH v8 1/4] ARM: mxs: add GPMI-NAND support for imx23/imx28
  http://permalink.gmane.org/gmane.linux.drivers.mtd/37338

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 include/linux/mtd/gpmi-nand.h | 68 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 include/linux/mtd/gpmi-nand.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
new file mode 100644
index 000000000000..69b6dbf46b5e
--- /dev/null
+++ b/include/linux/mtd/gpmi-nand.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef __MACH_MXS_GPMI_NAND_H__
+#define __MACH_MXS_GPMI_NAND_H__
+
+/* The size of the resources is fixed. */
+#define GPMI_NAND_RES_SIZE	6
+
+/* Resource names for the GPMI NAND driver. */
+#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "GPMI NAND GPMI Registers"
+#define GPMI_NAND_GPMI_INTERRUPT_RES_NAME  "GPMI NAND GPMI Interrupt"
+#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "GPMI NAND BCH Registers"
+#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "GPMI NAND BCH Interrupt"
+#define GPMI_NAND_DMA_CHANNELS_RES_NAME    "GPMI NAND DMA Channels"
+#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "GPMI NAND DMA Interrupt"
+
+/**
+ * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
+ *
+ * This structure communicates platform-specific information to the GPMI NAND
+ * driver that can't be expressed as resources.
+ *
+ * @platform_init:           A pointer to a function the driver will call to
+ *                           initialize the platform (e.g., set up the pin mux).
+ * @min_prop_delay_in_ns:    Minimum propagation delay of GPMI signals to and
+ *                           from the NAND Flash device, in nanoseconds.
+ * @max_prop_delay_in_ns:    Maximum propagation delay of GPMI signals to and
+ *                           from the NAND Flash device, in nanoseconds.
+ * @max_chip_count:          The maximum number of chips for which the driver
+ *                           should configure the hardware. This value most
+ *                           likely reflects the number of pins that are
+ *                           connected to a NAND Flash device. If this is
+ *                           greater than the SoC hardware can support, the
+ *                           driver will print a message and fail to initialize.
+ * @partitions:              An optional pointer to an array of partition
+ *                           descriptions.
+ * @partition_count:         The number of elements in the partitions array.
+ */
+struct gpmi_nand_platform_data {
+	/* SoC hardware information. */
+	int		(*platform_init)(void);
+
+	/* NAND Flash information. */
+	unsigned int	min_prop_delay_in_ns;
+	unsigned int	max_prop_delay_in_ns;
+	unsigned int	max_chip_count;
+
+	/* Medium information. */
+	struct		mtd_partition *partitions;
+	unsigned	partition_count;
+};
+#endif
-- 
cgit v1.2.3


From c11b46c32c8a9bf05fdb76d70d8dc74fcbfd02d1 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 4 Jan 2012 15:34:17 +0100
Subject: dma: shdma: fix runtime PM: clear channel buffers on reset

On platforms, supporting power domains, if the domain, containing a DMAC
instance is powered down, the driver fails to resume correctly. On those
platforms DMAC channels have an additional CHCLR register for clearing
channel buffers. Using this register during runtime resume fixes the
problem.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/shdma.c    | 47 +++++++++++++++++++++++++++++++----------------
 include/linux/sh_dma.h |  2 ++
 2 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 592304fb41a6..54043cd831c8 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -56,6 +56,15 @@ static LIST_HEAD(sh_dmae_devices);
 static unsigned long sh_dmae_slave_used[BITS_TO_LONGS(SH_DMA_SLAVE_NUMBER)];
 
 static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all);
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
+
+static void chclr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	__raw_writel(data, shdev->chan_reg +
+		     shdev->pdata->channel[sh_dc->id].chclr_offset);
+}
 
 static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
 {
@@ -128,6 +137,15 @@ static int sh_dmae_rst(struct sh_dmae_device *shdev)
 
 	dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
 
+	if (shdev->pdata->chclr_present) {
+		int i;
+		for (i = 0; i < shdev->pdata->channel_num; i++) {
+			struct sh_dmae_chan *sh_chan = shdev->chan[i];
+			if (sh_chan)
+				chclr_write(sh_chan, 0);
+		}
+	}
+
 	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
 
 	dmaor = dmaor_read(shdev);
@@ -138,6 +156,10 @@ static int sh_dmae_rst(struct sh_dmae_device *shdev)
 		dev_warn(shdev->common.dev, "Can't initialize DMAOR.\n");
 		return -EIO;
 	}
+	if (shdev->pdata->dmaor_init & ~dmaor)
+		dev_warn(shdev->common.dev,
+			 "DMAOR=0x%x hasn't latched the initial value 0x%x.\n",
+			 dmaor, shdev->pdata->dmaor_init);
 	return 0;
 }
 
@@ -258,8 +280,6 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
 	return 0;
 }
 
-static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan);
-
 static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c;
@@ -339,6 +359,8 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
 				sh_chan_xfer_ld_queue(sh_chan);
 			sh_chan->pm_state = DMAE_PM_ESTABLISHED;
 		}
+	} else {
+		sh_chan->pm_state = DMAE_PM_PENDING;
 	}
 
 	spin_unlock_irq(&sh_chan->desc_lock);
@@ -1224,6 +1246,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, shdev);
 
+	shdev->common.dev = &pdev->dev;
+
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
@@ -1253,7 +1277,6 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 	shdev->common.device_prep_slave_sg = sh_dmae_prep_slave_sg;
 	shdev->common.device_control = sh_dmae_control;
 
-	shdev->common.dev = &pdev->dev;
 	/* Default transfer size of 32 bytes requires 32-byte alignment */
 	shdev->common.copy_align = LOG2_DEFAULT_XFER_SIZE;
 
@@ -1434,22 +1457,17 @@ static int sh_dmae_runtime_resume(struct device *dev)
 #ifdef CONFIG_PM
 static int sh_dmae_suspend(struct device *dev)
 {
-	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
-	int i;
-
-	for (i = 0; i < shdev->pdata->channel_num; i++) {
-		struct sh_dmae_chan *sh_chan = shdev->chan[i];
-		if (sh_chan->descs_allocated)
-			sh_chan->pm_error = pm_runtime_put_sync(dev);
-	}
-
 	return 0;
 }
 
 static int sh_dmae_resume(struct device *dev)
 {
 	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
-	int i;
+	int i, ret;
+
+	ret = sh_dmae_rst(shdev);
+	if (ret < 0)
+		dev_err(dev, "Failed to reset!\n");
 
 	for (i = 0; i < shdev->pdata->channel_num; i++) {
 		struct sh_dmae_chan *sh_chan = shdev->chan[i];
@@ -1458,9 +1476,6 @@ static int sh_dmae_resume(struct device *dev)
 		if (!sh_chan->descs_allocated)
 			continue;
 
-		if (!sh_chan->pm_error)
-			pm_runtime_get_sync(dev);
-
 		if (param) {
 			const struct sh_dmae_slave_config *cfg = param->config;
 			dmae_set_dmars(sh_chan, cfg->mid_rid);
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index 62ef6938da10..8cd7fe59cf1a 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -48,6 +48,7 @@ struct sh_dmae_channel {
 	unsigned int	offset;
 	unsigned int	dmars;
 	unsigned int	dmars_bit;
+	unsigned int	chclr_offset;
 };
 
 struct sh_dmae_pdata {
@@ -68,6 +69,7 @@ struct sh_dmae_pdata {
 	unsigned int dmaor_is_32bit:1;
 	unsigned int needs_tend_set:1;
 	unsigned int no_dmars:1;
+	unsigned int chclr_present:1;
 };
 
 /* DMA register */
-- 
cgit v1.2.3


From 46fe44ce8777f087aa8ad4a2605fdcfb9c2d63af Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 16 Nov 2011 15:03:59 +0100
Subject: quota: Pass information that quota is stored in system file to
 userspace

Quota tools need to know whether quota is stored in a system file or in
classical aquota.{user|group} files. So pass this information as a flag
in GETINFO quotactl.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 8 +++++---
 include/linux/quota.h | 6 +++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5ec59b20cf76..46741970371b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2125,6 +2125,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		mutex_unlock(&dqopt->dqio_mutex);
 		goto out_file_init;
 	}
+	if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
+		dqopt->info[type].dqi_flags |= DQF_SYS_FILE;
 	mutex_unlock(&dqopt->dqio_mutex);
 	spin_lock(&dq_state_lock);
 	dqopt->flags |= dquot_state_flag(flags, type);
@@ -2464,7 +2466,7 @@ int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	spin_lock(&dq_data_lock);
 	ii->dqi_bgrace = mi->dqi_bgrace;
 	ii->dqi_igrace = mi->dqi_igrace;
-	ii->dqi_flags = mi->dqi_flags & DQF_MASK;
+	ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK;
 	ii->dqi_valid = IIF_ALL;
 	spin_unlock(&dq_data_lock);
 	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
@@ -2490,8 +2492,8 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 	if (ii->dqi_valid & IIF_IGRACE)
 		mi->dqi_igrace = ii->dqi_igrace;
 	if (ii->dqi_valid & IIF_FLAGS)
-		mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) |
-				(ii->dqi_flags & DQF_MASK);
+		mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) |
+				(ii->dqi_flags & DQF_SETINFO_MASK);
 	spin_unlock(&dq_data_lock);
 	mark_info_dirty(sb, type);
 	/* Force write to disk */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index cb7855699037..c09fa042b5ea 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -230,7 +230,11 @@ struct mem_dqinfo {
 struct super_block;
 
 #define DQF_MASK 0xffff		/* Mask for format specific flags */
-#define DQF_INFO_DIRTY_B 16
+#define DQF_GETINFO_MASK 0x1ffff	/* Mask for flags passed to userspace */
+#define DQF_SETINFO_MASK 0xffff		/* Mask for flags modifiable from userspace */
+#define DQF_SYS_FILE_B		16
+#define DQF_SYS_FILE (1 << DQF_SYS_FILE_B)	/* Quota file stored as system file */
+#define DQF_INFO_DIRTY_B	31
 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B)	/* Is info dirty? */
 
 extern void mark_info_dirty(struct super_block *sb, int type);
-- 
cgit v1.2.3


From 7c7c7f01cc5e3e423120a4848a73dd5e4586f2f9 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 11 Jan 2012 19:30:38 +0000
Subject: vhost-net: add module alias (v2.1)

By adding some module aliases, programs (or users) won't have to explicitly
call modprobe. Vhost-net will always be available if built into the kernel.
It does require assigning a permanent minor number for depmod to work.

Also:
  - use C99 style initialization.
  - add missing entry in documentation for loop-control

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-By: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devices.txt  | 3 +++
 drivers/vhost/net.c        | 8 +++++---
 include/linux/miscdevice.h | 1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index cec8864ce4e8..00383186d8fb 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -447,6 +447,9 @@ Your cooperation is appreciated.
 		234 = /dev/btrfs-control	Btrfs control device
 		235 = /dev/autofs	Autofs control device
 		236 = /dev/mapper/control	Device-Mapper control device
+		237 = /dev/loop-control Loopback control device
+		238 = /dev/vhost-net	Host kernel accelerator for virtio net
+
 		240-254			Reserved for local use
 		255			Reserved for MISC_DYNAMIC_MINOR
 
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 882a51fe7b3c..9dab1f51dd43 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -856,9 +856,9 @@ static const struct file_operations vhost_net_fops = {
 };
 
 static struct miscdevice vhost_net_misc = {
-	MISC_DYNAMIC_MINOR,
-	"vhost-net",
-	&vhost_net_fops,
+	.minor = VHOST_NET_MINOR,
+	.name = "vhost-net",
+	.fops = &vhost_net_fops,
 };
 
 static int vhost_net_init(void)
@@ -879,3 +879,5 @@ MODULE_VERSION("0.0.1");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Michael S. Tsirkin");
 MODULE_DESCRIPTION("Host kernel accelerator for virtio net");
+MODULE_ALIAS_MISCDEV(VHOST_NET_MINOR);
+MODULE_ALIAS("devname:vhost-net");
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 32085249e9cb..0549d2115507 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -42,6 +42,7 @@
 #define AUTOFS_MINOR		235
 #define MAPPER_CTRL_MINOR	236
 #define LOOP_CTRL_MINOR		237
+#define VHOST_NET_MINOR		238
 #define MISC_DYNAMIC_MINOR	255
 
 struct device;
-- 
cgit v1.2.3


From 9bf04646b0b41c5438ed8a27c5f8dbe0ff40d756 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 15 Jan 2012 16:57:12 +0100
Subject: netfilter: revert user-space expectation helper support

This patch partially reverts:
3d058d7 netfilter: rework user-space expectation helper support
that was applied during the 3.2 development cycle.

After this patch, the tree remains just like before patch bc01bef,
that initially added the preliminary infrastructure.

I decided to partially revert this patch because the approach
that I proposed to resolve this problem is broken in NAT setups.
Moreover, a new infrastructure will be submitted for the 3.3.x
development cycle that resolve the existing issues while
providing a neat solution.

Since nobody has been seriously using this infrastructure in
user-space, the removal of this feature should affect any know
FOSS project (to my knowledge).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h |  4 ----
 include/linux/netfilter/xt_CT.h               |  3 +--
 net/netfilter/nf_conntrack_helper.c           | 12 ------------
 net/netfilter/nf_conntrack_netlink.c          |  4 ----
 net/netfilter/xt_CT.c                         |  8 +++-----
 5 files changed, 4 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 9e3a2838291b..0d3dd66322ec 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -83,10 +83,6 @@ enum ip_conntrack_status {
 	/* Conntrack is a fake untracked entry */
 	IPS_UNTRACKED_BIT = 12,
 	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
-
-	/* Conntrack has a userspace helper. */
-	IPS_USERSPACE_HELPER_BIT = 13,
-	IPS_USERSPACE_HELPER = (1 << IPS_USERSPACE_HELPER_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h
index 6390f0992f36..b56e76811c04 100644
--- a/include/linux/netfilter/xt_CT.h
+++ b/include/linux/netfilter/xt_CT.h
@@ -3,8 +3,7 @@
 
 #include <linux/types.h>
 
-#define XT_CT_NOTRACK		0x1
-#define XT_CT_USERSPACE_HELPER	0x2
+#define XT_CT_NOTRACK	0x1
 
 struct xt_ct_target_info {
 	__u16 flags;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 299fec91f741..bbe23baa19b6 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -121,18 +121,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 	int ret = 0;
 
 	if (tmpl != NULL) {
-		/* we've got a userspace helper. */
-		if (tmpl->status & IPS_USERSPACE_HELPER) {
-			help = nf_ct_helper_ext_add(ct, flags);
-			if (help == NULL) {
-				ret = -ENOMEM;
-				goto out;
-			}
-			rcu_assign_pointer(help->helper, NULL);
-			__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
-			ret = 0;
-			goto out;
-		}
 		help = nfct_help(tmpl);
 		if (help != NULL)
 			helper = help->helper;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2a4834b83332..9307b033c0c9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2042,10 +2042,6 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 	}
 	help = nfct_help(ct);
 	if (!help) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
-	if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) {
 		if (!cda[CTA_EXPECT_TIMEOUT]) {
 			err = -EINVAL;
 			goto out;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 8e87123f1373..0221d10de75a 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	int ret = 0;
 	u8 proto;
 
-	if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER))
-		return -EOPNOTSUPP;
+	if (info->flags & ~XT_CT_NOTRACK)
+		return -EINVAL;
 
 	if (info->flags & XT_CT_NOTRACK) {
 		ct = nf_ct_untracked_get();
@@ -92,9 +92,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 				  GFP_KERNEL))
 		goto err3;
 
-	if (info->flags & XT_CT_USERSPACE_HELPER) {
-		__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
-	} else if (info->helper[0]) {
+	if (info->helper[0]) {
 		ret = -ENOENT;
 		proto = xt_ct_find_proto(par);
 		if (!proto) {
-- 
cgit v1.2.3


From 7061ca3b6c99fc78115560b9a10227c8c5fafc45 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 20 Dec 2011 08:20:46 -0800
Subject: sched: Add "const" to is_idle_task() parameter

This patch fixes a build warning in -next due to a const pointer being
passed to is_idle_task().  Because is_idle_task() does not modify anything,
this commit adds the "const" to is_idle_task()'s argument declaration.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a7e4d333a27..56fa25a5b1eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2074,7 +2074,7 @@ extern struct task_struct *idle_task(int cpu);
  * is_idle_task - is the specified task an idle task?
  * @tsk: the task in question.
  */
-static inline bool is_idle_task(struct task_struct *p)
+static inline bool is_idle_task(const struct task_struct *p)
 {
 	return p->pid == 0;
 }
-- 
cgit v1.2.3


From b54ac6d2a25084667da781c7ca2cebef52a2bcdd Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 8 Dec 2011 11:25:49 +0800
Subject: ACPI, Record ACPI NVS regions

Some firmware will access memory in ACPI NVS region via APEI.  That
is, instructions in APEI ERST/EINJ table will read/write ACPI NVS
region.  The original resource conflict checking in APEI code will
check memory/ioport accessed by APEI via general resource management
mechanism.  But ACPI NVS region is marked as busy already, so that the
false resource conflict will prevent APEI ERST/EINJ to work.

To fix this, this patch record ACPI NVS regions, so that we can avoid
request resources for memory region inside it.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/e820.c |  4 ++--
 drivers/acpi/Makefile  |  3 ++-
 drivers/acpi/nvs.c     | 53 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/acpi.h   | 20 +++++++++++++------
 4 files changed, 70 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f076..51c3b186e5b9 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -714,7 +714,7 @@ void __init e820_mark_nosave_regions(unsigned long limit_pfn)
 }
 #endif
 
-#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_ACPI
 /**
  * Mark ACPI NVS memory region, so that we can save/restore it during
  * hibernation and the subsequent resume.
@@ -727,7 +727,7 @@ static int __init e820_mark_nvs_memory(void)
 		struct e820entry *ei = &e820.map[i];
 
 		if (ei->type == E820_NVS)
-			suspend_nvs_register(ei->addr, ei->size);
+			acpi_nvs_register(ei->addr, ei->size);
 	}
 
 	return 0;
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index ecb26b4f29a0..c07f44f05f9d 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -20,11 +20,12 @@ obj-y				+= acpi.o \
 # All the builtin files are in the "acpi." module_param namespace.
 acpi-y				+= osl.o utils.o reboot.o
 acpi-y				+= atomicio.o
+acpi-y				+= nvs.o
 
 # sleep related files
 acpi-y				+= wakeup.o
 acpi-y				+= sleep.o
-acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o nvs.o
+acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o
 
 
 #
diff --git a/drivers/acpi/nvs.c b/drivers/acpi/nvs.c
index 096787b43c96..7a2035fa8c71 100644
--- a/drivers/acpi/nvs.c
+++ b/drivers/acpi/nvs.c
@@ -15,6 +15,56 @@
 #include <linux/acpi_io.h>
 #include <acpi/acpiosxf.h>
 
+/* ACPI NVS regions, APEI may use it */
+
+struct nvs_region {
+	__u64 phys_start;
+	__u64 size;
+	struct list_head node;
+};
+
+static LIST_HEAD(nvs_region_list);
+
+#ifdef CONFIG_ACPI_SLEEP
+static int suspend_nvs_register(unsigned long start, unsigned long size);
+#else
+static inline int suspend_nvs_register(unsigned long a, unsigned long b)
+{
+	return 0;
+}
+#endif
+
+int acpi_nvs_register(__u64 start, __u64 size)
+{
+	struct nvs_region *region;
+
+	region = kmalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+	region->phys_start = start;
+	region->size = size;
+	list_add_tail(&region->node, &nvs_region_list);
+
+	return suspend_nvs_register(start, size);
+}
+
+int acpi_nvs_for_each_region(int (*func)(__u64 start, __u64 size, void *data),
+			     void *data)
+{
+	int rc;
+	struct nvs_region *region;
+
+	list_for_each_entry(region, &nvs_region_list, node) {
+		rc = func(region->phys_start, region->size, data);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+
+#ifdef CONFIG_ACPI_SLEEP
 /*
  * Platforms, like ACPI, may want us to save some memory used by them during
  * suspend and to restore the contents of this memory during the subsequent
@@ -41,7 +91,7 @@ static LIST_HEAD(nvs_list);
  *	things so that the data from page-aligned addresses in this region will
  *	be copied into separate RAM pages.
  */
-int suspend_nvs_register(unsigned long start, unsigned long size)
+static int suspend_nvs_register(unsigned long start, unsigned long size)
 {
 	struct nvs_page *entry, *next;
 
@@ -159,3 +209,4 @@ void suspend_nvs_restore(void)
 		if (entry->data)
 			memcpy(entry->kaddr, entry->data, entry->size);
 }
+#endif
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6001b4da39dd..26b75442ff7a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -306,6 +306,11 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
 					     u32 *mask, u32 req);
 extern void acpi_early_init(void);
 
+extern int acpi_nvs_register(__u64 start, __u64 size);
+
+extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
+				    void *data);
+
 #else	/* !CONFIG_ACPI */
 
 #define acpi_disabled 1
@@ -348,15 +353,18 @@ static inline int acpi_table_parse(char *id,
 {
 	return -1;
 }
-#endif	/* !CONFIG_ACPI */
 
-#ifdef CONFIG_ACPI_SLEEP
-int suspend_nvs_register(unsigned long start, unsigned long size);
-#else
-static inline int suspend_nvs_register(unsigned long a, unsigned long b)
+static inline int acpi_nvs_register(__u64 start, __u64 size)
 {
 	return 0;
 }
-#endif
+
+static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
+					   void *data)
+{
+	return 0;
+}
+
+#endif	/* !CONFIG_ACPI */
 
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From 6f68c91c55ea3576d366797fa8d45e31c4aa79f8 Mon Sep 17 00:00:00 2001
From: Myron Stowe <mstowe@redhat.com>
Date: Mon, 7 Nov 2011 16:23:34 -0700
Subject: ACPI: Export interfaces for ioremapping/iounmapping ACPI registers

Export remapping and unmapping interfaces - acpi_os_map_generic_address()
and acpi_os_unmap_generic_address() - for ACPI generic registers that are
backed by memory mapped I/O (MMIO).

The acpi_os_map_generic_address() and acpi_os_unmap_generic_address()
declarations may more properly belong in include/acpi/acpiosxf.h next to
acpi_os_read_memory() but I believe that would require the ACPI CA making
them an official part of the ACPI CA - OS interface.

ACPI Generic Address Structure (GAS) reference (ACPI's fixed/generic
hardware registers use the GAS format):
  ACPI Specification, Revision 4.0, Section 5.2.3.1, "Generic Address
  Structure"

Signed-off-by: Myron Stowe <myron.stowe@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/osl.c      | 6 ++++--
 include/linux/acpi_io.h | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 2e285cdbefb1..b11f2676f7c9 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -431,7 +431,7 @@ void __init early_acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
 		__acpi_unmap_table(virt, size);
 }
 
-static int acpi_os_map_generic_address(struct acpi_generic_address *gas)
+int acpi_os_map_generic_address(struct acpi_generic_address *gas)
 {
 	u64 addr;
 	void __iomem *virt;
@@ -450,8 +450,9 @@ static int acpi_os_map_generic_address(struct acpi_generic_address *gas)
 
 	return 0;
 }
+EXPORT_SYMBOL(acpi_os_map_generic_address);
 
-static void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
+void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
 {
 	u64 addr;
 	struct acpi_ioremap *map;
@@ -475,6 +476,7 @@ static void acpi_os_unmap_generic_address(struct acpi_generic_address *gas)
 
 	acpi_os_map_cleanup(map);
 }
+EXPORT_SYMBOL(acpi_os_unmap_generic_address);
 
 #ifdef ACPI_FUTURE_USAGE
 acpi_status
diff --git a/include/linux/acpi_io.h b/include/linux/acpi_io.h
index 4afd7102459d..b0ffa219993e 100644
--- a/include/linux/acpi_io.h
+++ b/include/linux/acpi_io.h
@@ -12,4 +12,7 @@ static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
 
 void __iomem *acpi_os_get_iomem(acpi_physical_address phys, unsigned int size);
 
+int acpi_os_map_generic_address(struct acpi_generic_address *addr);
+void acpi_os_unmap_generic_address(struct acpi_generic_address *addr);
+
 #endif
-- 
cgit v1.2.3


From 7d5869e78f4c9d32f834dadefbb7dcb3c9d4d85f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 13 Jan 2012 23:58:41 +0100
Subject: bcma: connect the bcma bus suspend/resume to the bcma driver
 suspend/resume

Now the low-level driver actually gets informed that it is getting suspended and resumed.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/main.c                                   | 19 +++++++++++++++++++
 drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c |  2 +-
 include/linux/bcma/bcma.h                             |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index b711d9d634a7..febbc0a1222a 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -243,6 +243,16 @@ int __init bcma_bus_early_register(struct bcma_bus *bus,
 #ifdef CONFIG_PM
 int bcma_bus_suspend(struct bcma_bus *bus)
 {
+	struct bcma_device *core;
+
+	list_for_each_entry(core, &bus->cores, list) {
+		struct device_driver *drv = core->dev.driver;
+		if (drv) {
+			struct bcma_driver *adrv = container_of(drv, struct bcma_driver, drv);
+			if (adrv->suspend)
+				adrv->suspend(core);
+		}
+	}
 	return 0;
 }
 
@@ -257,6 +267,15 @@ int bcma_bus_resume(struct bcma_bus *bus)
 		bcma_core_chipcommon_init(&bus->drv_cc);
 	}
 
+	list_for_each_entry(core, &bus->cores, list) {
+		struct device_driver *drv = core->dev.driver;
+		if (drv) {
+			struct bcma_driver *adrv = container_of(drv, struct bcma_driver, drv);
+			if (adrv->resume)
+				adrv->resume(core);
+		}
+	}
+
 	return 0;
 }
 #endif
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
index d106576ce338..213130afdaf7 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
@@ -1135,7 +1135,7 @@ static int brcms_pci_suspend(struct pci_dev *pdev)
 	return pci_set_power_state(pdev, PCI_D3hot);
 }
 
-static int brcms_suspend(struct bcma_device *pdev, pm_message_t state)
+static int brcms_suspend(struct bcma_device *pdev)
 {
 	struct brcms_info *wl;
 	struct ieee80211_hw *hw;
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index f4b8346b1a33..83c209f39493 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -162,7 +162,7 @@ struct bcma_driver {
 
 	int (*probe)(struct bcma_device *dev);
 	void (*remove)(struct bcma_device *dev);
-	int (*suspend)(struct bcma_device *dev, pm_message_t state);
+	int (*suspend)(struct bcma_device *dev);
 	int (*resume)(struct bcma_device *dev);
 	void (*shutdown)(struct bcma_device *dev);
 
-- 
cgit v1.2.3


From 20c300b10c358daa507be335aec6aa3987ef425a Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 17 Jan 2012 12:54:01 +0400
Subject: tty: remove unused tty_driver->termios_locked

This field is unused since 2.6.28 (commit fe6e29fdb1a7: "tty: simplify
ktermios allocation", to be exact)

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty_driver.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index ecdaeb98b293..5cf685086dd3 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -312,7 +312,6 @@ struct tty_driver {
 	 */
 	struct tty_struct **ttys;
 	struct ktermios **termios;
-	struct ktermios **termios_locked;
 	void *driver_state;
 
 	/*
-- 
cgit v1.2.3


From 85e7bac33b8d5edafc4e219c7dfdb3d48e0b4e31 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:05 -0500
Subject: seccomp: audit abnormal end to a process due to seccomp

The audit system likes to collect information about processes that end
abnormally (SIGSEGV) as this may me useful intrusion detection information.
This patch adds audit support to collect information when seccomp forces a
task to exit because of misbehavior in a similar way.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  8 ++++++++
 kernel/auditsc.c      | 50 +++++++++++++++++++++++++++++---------------------
 kernel/seccomp.c      |  2 ++
 3 files changed, 39 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 426ab9f4dd85..6e1c533f9b46 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -430,6 +430,7 @@ extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct dentry *dentry);
 extern void __audit_inode_child(const struct dentry *dentry,
 				const struct inode *parent);
+extern void __audit_seccomp(unsigned long syscall);
 extern void __audit_ptrace(struct task_struct *t);
 
 static inline int audit_dummy_context(void)
@@ -453,6 +454,12 @@ static inline void audit_inode_child(const struct dentry *dentry,
 }
 void audit_core_dumps(long signr);
 
+static inline void audit_seccomp(unsigned long syscall)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_seccomp(syscall);
+}
+
 static inline void audit_ptrace(struct task_struct *t)
 {
 	if (unlikely(!audit_dummy_context()))
@@ -558,6 +565,7 @@ extern int audit_signals;
 #define audit_inode(n,d) do { (void)(d); } while (0)
 #define audit_inode_child(i,p) do { ; } while (0)
 #define audit_core_dumps(i) do { ; } while (0)
+#define audit_seccomp(i) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) (0)
 #define audit_get_loginuid(t) (-1)
 #define audit_get_sessionid(t) (-1)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7c495147c3d9..e9bcb93800d8 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2529,6 +2529,25 @@ void __audit_mmap_fd(int fd, int flags)
 	context->type = AUDIT_MMAP;
 }
 
+static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+{
+	uid_t auid, uid;
+	gid_t gid;
+	unsigned int sessionid;
+
+	auid = audit_get_loginuid(current);
+	sessionid = audit_get_sessionid(current);
+	current_uid_gid(&uid, &gid);
+
+	audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
+			 auid, uid, gid, sessionid);
+	audit_log_task_context(ab);
+	audit_log_format(ab, " pid=%d comm=", current->pid);
+	audit_log_untrustedstring(ab, current->comm);
+	audit_log_format(ab, " reason=");
+	audit_log_string(ab, reason);
+	audit_log_format(ab, " sig=%ld", signr);
+}
 /**
  * audit_core_dumps - record information about processes that end abnormally
  * @signr: signal value
@@ -2539,10 +2558,6 @@ void __audit_mmap_fd(int fd, int flags)
 void audit_core_dumps(long signr)
 {
 	struct audit_buffer *ab;
-	u32 sid;
-	uid_t auid = audit_get_loginuid(current), uid;
-	gid_t gid;
-	unsigned int sessionid = audit_get_sessionid(current);
 
 	if (!audit_enabled)
 		return;
@@ -2551,24 +2566,17 @@ void audit_core_dumps(long signr)
 		return;
 
 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
-	current_uid_gid(&uid, &gid);
-	audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u",
-			 auid, uid, gid, sessionid);
-	security_task_getsecid(current, &sid);
-	if (sid) {
-		char *ctx = NULL;
-		u32 len;
+	audit_log_abend(ab, "memory violation", signr);
+	audit_log_end(ab);
+}
 
-		if (security_secid_to_secctx(sid, &ctx, &len))
-			audit_log_format(ab, " ssid=%u", sid);
-		else {
-			audit_log_format(ab, " subj=%s", ctx);
-			security_release_secctx(ctx, len);
-		}
-	}
-	audit_log_format(ab, " pid=%d comm=", current->pid);
-	audit_log_untrustedstring(ab, current->comm);
-	audit_log_format(ab, " sig=%ld", signr);
+void __audit_seccomp(unsigned long syscall)
+{
+	struct audit_buffer *ab;
+
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+	audit_log_abend(ab, "seccomp", SIGKILL);
+	audit_log_format(ab, " syscall=%ld", syscall);
 	audit_log_end(ab);
 }
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 57d4b13b631d..e8d76c5895ea 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -6,6 +6,7 @@
  * This defines a simple but solid secure-computing mode.
  */
 
+#include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/sched.h>
 #include <linux/compat.h>
@@ -54,6 +55,7 @@ void __secure_computing(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
+	audit_seccomp(this_syscall);
 	do_exit(SIGKILL);
 }
 
-- 
cgit v1.2.3


From d7e7528bcd456f5c36ad4a202ccfb43c5aa98bc4 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:06 -0500
Subject: Audit: push audit success and retcode into arch ptrace.h

The audit system previously expected arches calling to audit_syscall_exit to
supply as arguments if the syscall was a success and what the return code was.
Audit also provides a helper AUDITSC_RESULT which was supposed to simplify things
by converting from negative retcodes to an audit internal magic value stating
success or failure.  This helper was wrong and could indicate that a valid
pointer returned to userspace was a failed syscall.  The fix is to fix the
layering foolishness.  We now pass audit_syscall_exit a struct pt_reg and it
in turns calls back into arch code to collect the return value and to
determine if the syscall was a success or failure.  We also define a generic
is_syscall_success() macro which determines success/failure based on if the
value is < -MAX_ERRNO.  This works for arches like x86 which do not use a
separate mechanism to indicate syscall failure.

We make both the is_syscall_success() and regs_return_value() static inlines
instead of macros.  The reason is because the audit function must take a void*
for the regs.  (uml calls theirs struct uml_pt_regs instead of just struct
pt_regs so audit_syscall_exit can't take a struct pt_regs).  Since the audit
function takes a void* we need to use static inlines to cast it back to the
arch correct structure to dereference it.

The other major change is that on some arches, like ia64, MIPS and ppc, we
change regs_return_value() to give us the negative value on syscall failure.
THE only other user of this macro, kretprobe_example.c, won't notice and it
makes the value signed consistently for the audit functions across all archs.

In arch/sh/kernel/ptrace_64.c I see that we were using regs[9] in the old
audit code as the return value.  But the ptrace_64.h code defined the macro
regs_return_value() as regs[3].  I have no idea which one is correct, but this
patch now uses the regs_return_value() function, so it now uses regs[3].

For powerpc we previously used regs->result but now use the
regs_return_value() function which uses regs->gprs[3].  regs->gprs[3] is
always positive so the regs_return_value(), much like ia64 makes it negative
before calling the audit code when appropriate.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: H. Peter Anvin <hpa@zytor.com> [for x86 portion]
Acked-by: Tony Luck <tony.luck@intel.com> [for ia64]
Acked-by: Richard Weinberger <richard@nod.at> [for uml]
Acked-by: David S. Miller <davem@davemloft.net> [for sparc]
Acked-by: Ralf Baechle <ralf@linux-mips.org> [for mips]
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [for ppc]
---
 arch/ia64/include/asm/ptrace.h       | 13 ++++++++++++-
 arch/ia64/kernel/ptrace.c            |  9 +--------
 arch/microblaze/include/asm/ptrace.h |  5 +++++
 arch/microblaze/kernel/ptrace.c      |  3 +--
 arch/mips/include/asm/ptrace.h       | 14 +++++++++++++-
 arch/mips/kernel/ptrace.c            |  4 +---
 arch/powerpc/include/asm/ptrace.h    | 13 ++++++++++++-
 arch/powerpc/kernel/ptrace.c         |  4 +---
 arch/s390/include/asm/ptrace.h       |  6 +++++-
 arch/s390/kernel/ptrace.c            |  4 +---
 arch/sh/include/asm/ptrace_32.h      |  5 ++++-
 arch/sh/include/asm/ptrace_64.h      |  5 ++++-
 arch/sh/kernel/ptrace_32.c           |  4 +---
 arch/sh/kernel/ptrace_64.c           |  4 +---
 arch/sparc/include/asm/ptrace.h      | 10 +++++++++-
 arch/sparc/kernel/ptrace_64.c        | 11 +----------
 arch/um/kernel/ptrace.c              |  4 ++--
 arch/x86/ia32/ia32entry.S            | 10 +++++-----
 arch/x86/kernel/entry_32.S           |  8 ++++----
 arch/x86/kernel/entry_64.S           | 10 +++++-----
 arch/x86/kernel/ptrace.c             |  3 +--
 arch/x86/kernel/vm86_32.c            |  4 ++--
 arch/x86/um/shared/sysdep/ptrace.h   |  5 +++++
 include/linux/audit.h                | 22 ++++++++++++++--------
 include/linux/ptrace.h               | 10 ++++++++++
 kernel/auditsc.c                     | 16 ++++++++++++----
 26 files changed, 132 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index f5cb27614e35..68c98f5b3ca6 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -246,7 +246,18 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
 	return regs->ar_bspstore;
 }
 
-#define regs_return_value(regs) ((regs)->r8)
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return regs->r10 != -1;
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->r8;
+	else
+		return -regs->r8;
+}
 
 /* Conserve space in histogram by encoding slot bits in address
  * bits 2 and 3 rather than bits 0 and 1.
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 8848f43d819e..2c154088cce7 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1268,14 +1268,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 {
 	int step;
 
-	if (unlikely(current->audit_context)) {
-		int success = AUDITSC_RESULT(regs.r10);
-		long result = regs.r8;
-
-		if (success != AUDITSC_SUCCESS)
-			result = -result;
-		audit_syscall_exit(success, result);
-	}
+	audit_syscall_exit(&regs);
 
 	step = test_thread_flag(TIF_SINGLESTEP);
 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
diff --git a/arch/microblaze/include/asm/ptrace.h b/arch/microblaze/include/asm/ptrace.h
index 816bee64b196..94e92c805859 100644
--- a/arch/microblaze/include/asm/ptrace.h
+++ b/arch/microblaze/include/asm/ptrace.h
@@ -61,6 +61,11 @@ struct pt_regs {
 #define instruction_pointer(regs)	((regs)->pc)
 #define profile_pc(regs)		instruction_pointer(regs)
 
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->r3;
+}
+
 #else /* __KERNEL__ */
 
 /* pt_regs offsets used by gdbserver etc in ptrace syscalls */
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 043cb58f9c44..f564b1bfd386 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -159,8 +159,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3);
+	audit_syscall_exit(regs);
 
 	step = test_thread_flag(TIF_SINGLESTEP);
 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index de39b1f343ea..7d409505df2d 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -137,7 +137,19 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
  */
 #define user_mode(regs) (((regs)->cp0_status & KU_MASK) == KU_USER)
 
-#define regs_return_value(_regs) ((_regs)->regs[2])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !regs->regs[7];
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->regs[2];
+	else
+		return -regs->regs[2];
+}
+
 #define instruction_pointer(regs) ((regs)->cp0_epc)
 #define profile_pc(regs) instruction_pointer(regs)
 
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 4e6ea1ffad46..ab0f1963a7bd 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -572,9 +572,7 @@ out:
  */
 asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 {
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[7]),
-		                   -regs->regs[2]);
+	audit_syscall_exit(regs);
 
 	if (!(current->ptrace & PT_PTRACED))
 		return;
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 48223f9b8728..78a205162fd7 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -86,7 +86,18 @@ struct pt_regs {
 #define instruction_pointer(regs) ((regs)->nip)
 #define user_stack_pointer(regs) ((regs)->gpr[1])
 #define kernel_stack_pointer(regs) ((regs)->gpr[1])
-#define regs_return_value(regs) ((regs)->gpr[3])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (is_syscall_success(regs))
+		return regs->gpr[3];
+	else
+		return -regs->gpr[3];
+}
 
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 5de73dbd15c7..09d31c12a5e3 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1748,9 +1748,7 @@ void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS,
-				   regs->result);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->result);
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 56da355678f4..aeb77f017985 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -541,9 +541,13 @@ struct user_regs_struct
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
 #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
 #define user_stack_pointer(regs)((regs)->gprs[15])
-#define regs_return_value(regs)((regs)->gprs[2])
 #define profile_pc(regs) instruction_pointer(regs)
 
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->gprs[2];
+}
+
 int regs_query_register_offset(const char *name);
 const char *regs_query_register_name(unsigned int offset);
 unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 573bc29551ef..f52758600980 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -751,9 +751,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
 {
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]),
-				   regs->gprs[2]);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->gprs[2]);
diff --git a/arch/sh/include/asm/ptrace_32.h b/arch/sh/include/asm/ptrace_32.h
index 6c2239cca1a2..2d3e906aa722 100644
--- a/arch/sh/include/asm/ptrace_32.h
+++ b/arch/sh/include/asm/ptrace_32.h
@@ -76,7 +76,10 @@ struct pt_dspregs {
 #ifdef __KERNEL__
 
 #define MAX_REG_OFFSET		offsetof(struct pt_regs, tra)
-#define regs_return_value(_regs)	((_regs)->regs[0])
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[0];
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sh/include/asm/ptrace_64.h b/arch/sh/include/asm/ptrace_64.h
index bf9be7764d69..eb3fcceaf64b 100644
--- a/arch/sh/include/asm/ptrace_64.h
+++ b/arch/sh/include/asm/ptrace_64.h
@@ -13,7 +13,10 @@ struct pt_regs {
 #ifdef __KERNEL__
 
 #define MAX_REG_OFFSET		offsetof(struct pt_regs, tregs[7])
-#define regs_return_value(_regs)	((_regs)->regs[3])
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->regs[3];
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 92b3c276339a..c0b5c179d27b 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -530,9 +530,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[0]),
-				   regs->regs[0]);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->regs[0]);
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index c8f97649f354..ba720d686435 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -548,9 +548,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
 {
 	int step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->regs[9]),
-				   regs->regs[9]);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->regs[9]);
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index a0e1bcf843a1..c00c3b5c2806 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -207,7 +207,15 @@ do {	current_thread_info()->syscall_noerror = 1; \
 #define instruction_pointer(regs) ((regs)->tpc)
 #define instruction_pointer_set(regs, val) ((regs)->tpc = (val))
 #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
-#define regs_return_value(regs) ((regs)->u_regs[UREG_I0])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !(regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY));
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->u_regs[UREG_I0];
+}
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *);
 #else
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 96ee50a80661..c73c8c50f117 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -1086,17 +1086,8 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 
 asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 {
-#ifdef CONFIG_AUDITSYSCALL
-	if (unlikely(current->audit_context)) {
-		unsigned long tstate = regs->tstate;
-		int result = AUDITSC_SUCCESS;
+	audit_syscall_exit(regs);
 
-		if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
-			result = AUDITSC_FAILURE;
-
-		audit_syscall_exit(result, regs->u_regs[UREG_I0]);
-	}
-#endif
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->u_regs[UREG_G1]);
 
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index c9da32b0c707..2ccf25c42feb 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -175,8 +175,8 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit)
 					    UPT_SYSCALL_ARG2(regs),
 					    UPT_SYSCALL_ARG3(regs),
 					    UPT_SYSCALL_ARG4(regs));
-		else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)),
-					UPT_SYSCALL_RET(regs));
+		else
+			audit_syscall_exit(regs);
 	}
 
 	/* Fake a debug trap */
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3e274564f6bf..64ced0b8f8fd 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -14,6 +14,7 @@
 #include <asm/segment.h>
 #include <asm/irqflags.h>
 #include <linux/linkage.h>
+#include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -208,12 +209,11 @@ sysexit_from_sys_call:
 	TRACE_IRQS_ON
 	sti
 	movl %eax,%esi		/* second arg, syscall return value */
-	cmpl $0,%eax		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
-	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
-	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall return value */
+	call __audit_syscall_exit
+	movq RAX-ARGOFFSET(%rsp),%rax	/* reload syscall return value */
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	cli
 	TRACE_IRQS_OFF
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 22d0e21b4dd7..a22facf06f0e 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -42,6 +42,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/err.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
 #include <asm/errno.h>
@@ -466,11 +467,10 @@ sysexit_audit:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_ANY)
 	movl %eax,%edx		/* second arg, syscall return value */
-	cmpl $0,%eax		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpl $-MAX_ERRNO,%eax	/* is it an error ? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%eax		/* zero-extend that */
-	inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
+	call __audit_syscall_exit
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a20e1cb9dc87..e51393dd93a3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
 #include <asm/paravirt.h>
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
+#include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
 #include <linux/elf-em.h>
@@ -563,17 +564,16 @@ auditsys:
 	jmp system_call_fastpath
 
 	/*
-	 * Return fast path for syscall audit.  Call audit_syscall_exit()
+	 * Return fast path for syscall audit.  Call __audit_syscall_exit()
 	 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
 	 * masked off.
 	 */
 sysret_audit:
 	movq RAX-ARGOFFSET(%rsp),%rsi	/* second arg, syscall return value */
-	cmpq $0,%rsi		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
+	cmpq $-MAX_ERRNO,%rsi	/* is it < -MAX_ERRNO? */
+	setbe %al		/* 1 if so, 0 if not */
 	movzbl %al,%edi		/* zero-extend that into %edi */
-	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
+	call __audit_syscall_exit
 	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
 	jmp sysret_check
 #endif	/* CONFIG_AUDITSYSCALL */
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 89a04c7b5bb6..8b0218758775 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1414,8 +1414,7 @@ void syscall_trace_leave(struct pt_regs *regs)
 {
 	bool step;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+	audit_syscall_exit(regs);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->ax);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 863f8753ab0a..af17e1c966dc 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -335,9 +335,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	if (info->flags & VM86_SCREEN_BITMAP)
 		mark_screen_rdonly(tsk->mm);
 
-	/*call audit_syscall_exit since we do not exit via the normal paths */
+	/*call __audit_syscall_exit since we do not exit via the normal paths */
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(0), 0);
+		__audit_syscall_exit(1, 0);
 
 	__asm__ __volatile__(
 		"movl %0,%%esp\n\t"
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 711b1621747f..5ef9344a8b24 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -3,3 +3,8 @@
 #else
 #include "ptrace_64.h"
 #endif
+
+static inline long regs_return_value(struct uml_pt_regs *regs)
+{
+	return UPT_SYSCALL_RET(regs);
+}
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 6e1c533f9b46..3d65e4b3ba06 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,6 +26,7 @@
 
 #include <linux/types.h>
 #include <linux/elf-em.h>
+#include <linux/ptrace.h>
 
 /* The netlink messages for the audit system is divided into blocks:
  * 1000 - 1099 are for commanding the audit system
@@ -408,10 +409,6 @@ struct audit_field {
 	void				*lsm_rule;
 };
 
-#define AUDITSC_INVALID 0
-#define AUDITSC_SUCCESS 1
-#define AUDITSC_FAILURE 2
-#define AUDITSC_RESULT(x) ( ((long)(x))<0?AUDITSC_FAILURE:AUDITSC_SUCCESS )
 extern int __init audit_register_class(int class, unsigned *list);
 extern int audit_classify_syscall(int abi, unsigned syscall);
 extern int audit_classify_arch(int arch);
@@ -424,7 +421,7 @@ extern void audit_free(struct task_struct *task);
 extern void audit_syscall_entry(int arch,
 				int major, unsigned long a0, unsigned long a1,
 				unsigned long a2, unsigned long a3);
-extern void audit_syscall_exit(int failed, long return_code);
+extern void __audit_syscall_exit(int ret_success, long ret_value);
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct dentry *dentry);
@@ -438,6 +435,15 @@ static inline int audit_dummy_context(void)
 	void *p = current->audit_context;
 	return !p || *(int *)p;
 }
+static inline void audit_syscall_exit(void *pt_regs)
+{
+	if (unlikely(current->audit_context)) {
+		int success = is_syscall_success(pt_regs);
+		int return_code = regs_return_value(pt_regs);
+
+		__audit_syscall_exit(success, return_code);
+	}
+}
 static inline void audit_getname(const char *name)
 {
 	if (unlikely(!audit_dummy_context()))
@@ -551,12 +557,12 @@ static inline void audit_mmap_fd(int fd, int flags)
 
 extern int audit_n_rules;
 extern int audit_signals;
-#else
+#else /* CONFIG_AUDITSYSCALL */
 #define audit_finish_fork(t)
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
 #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0)
-#define audit_syscall_exit(f,r) do { ; } while (0)
+#define audit_syscall_exit(r) do { ; } while (0)
 #define audit_dummy_context() 1
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
@@ -587,7 +593,7 @@ extern int audit_signals;
 #define audit_ptrace(t) ((void)0)
 #define audit_n_rules 0
 #define audit_signals 0
-#endif
+#endif /* CONFIG_AUDITSYSCALL */
 
 #ifdef CONFIG_AUDIT
 /* These are defined in audit.c */
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 800f113bea66..dd4cefa6519d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -112,6 +112,7 @@
 
 #include <linux/compiler.h>		/* For unlikely.  */
 #include <linux/sched.h>		/* For struct task_struct.  */
+#include <linux/err.h>			/* for IS_ERR_VALUE */
 
 
 extern long arch_ptrace(struct task_struct *child, long request,
@@ -265,6 +266,15 @@ static inline void ptrace_release_task(struct task_struct *task)
 #define force_successful_syscall_return() do { } while (0)
 #endif
 
+#ifndef is_syscall_success
+/*
+ * On most systems we can tell if a syscall is a success based on if the retval
+ * is an error value.  On some systems like ia64 and powerpc they have different
+ * indicators of success/failure and must define their own.
+ */
+#define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs))))
+#endif
+
 /*
  * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
  *
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e9bcb93800d8..3d2853808185 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -70,6 +70,11 @@
 
 #include "audit.h"
 
+/* flags stating the success for a syscall */
+#define AUDITSC_INVALID 0
+#define AUDITSC_SUCCESS 1
+#define AUDITSC_FAILURE 2
+
 /* AUDIT_NAMES is the number of slots we reserve in the audit_context
  * for saving names from getname().  If we get more names we will allocate
  * a name dynamically and also add those to the list anchored by names_list. */
@@ -1724,8 +1729,7 @@ void audit_finish_fork(struct task_struct *child)
 
 /**
  * audit_syscall_exit - deallocate audit context after a system call
- * @valid: success/failure flag
- * @return_code: syscall return value
+ * @pt_regs: syscall registers
  *
  * Tear down after system call.  If the audit context has been marked as
  * auditable (either because of the AUDIT_RECORD_CONTEXT state from
@@ -1733,13 +1737,17 @@ void audit_finish_fork(struct task_struct *child)
  * message), then write out the syscall information.  In call cases,
  * free the names stored from getname().
  */
-void audit_syscall_exit(int valid, long return_code)
+void __audit_syscall_exit(int success, long return_code)
 {
 	struct task_struct *tsk = current;
 	struct audit_context *context;
 
-	context = audit_get_context(tsk, valid, return_code);
+	if (success)
+		success = AUDITSC_SUCCESS;
+	else
+		success = AUDITSC_FAILURE;
 
+	context = audit_get_context(tsk, success, return_code);
 	if (likely(!context))
 		return;
 
-- 
cgit v1.2.3


From b05d8447e7821695bc2fa3359431f7a664232743 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:06 -0500
Subject: audit: inline audit_syscall_entry to reduce burden on archs

Every arch calls:

if (unlikely(current->audit_context))
	audit_syscall_entry()

which requires knowledge about audit (the existance of audit_context) in
the arch code.  Just do it all in static inline in audit.h so that arch's
can remain blissfully ignorant.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 arch/ia64/kernel/ptrace.c       |  9 +--------
 arch/microblaze/kernel/ptrace.c |  6 ++----
 arch/mips/kernel/ptrace.c       |  7 +++----
 arch/powerpc/kernel/ptrace.c    | 26 ++++++++++++--------------
 arch/s390/kernel/ptrace.c       | 11 +++++------
 arch/sh/kernel/ptrace_32.c      |  7 +++----
 arch/sh/kernel/ptrace_64.c      |  7 +++----
 arch/sparc/kernel/ptrace_64.c   | 17 ++++++++---------
 arch/um/kernel/ptrace.c         | 20 +++++++++-----------
 arch/x86/ia32/ia32entry.S       |  2 +-
 arch/x86/kernel/entry_32.S      |  2 +-
 arch/x86/kernel/entry_64.S      |  4 ++--
 arch/x86/kernel/ptrace.c        | 22 ++++++++++------------
 arch/xtensa/kernel/ptrace.c     |  3 +--
 include/linux/audit.h           | 13 ++++++++++---
 kernel/auditsc.c                |  2 +-
 16 files changed, 72 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 2c154088cce7..dad91661ddf9 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1246,15 +1246,8 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
 	if (test_thread_flag(TIF_RESTORE_RSE))
 		ia64_sync_krbs();
 
-	if (unlikely(current->audit_context)) {
-		long syscall;
-		int arch;
 
-		syscall = regs.r15;
-		arch = AUDIT_ARCH_IA64;
-
-		audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3);
-	}
+	audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3);
 
 	return 0;
 }
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index f564b1bfd386..6eb2aa927d89 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -147,10 +147,8 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 */
 		ret = -1L;
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(EM_MICROBLAZE, regs->r12,
-				    regs->r5, regs->r6,
-				    regs->r7, regs->r8);
+	audit_syscall_entry(EM_MICROBLAZE, regs->r12, regs->r5, regs->r6,
+			    regs->r7, regs->r8);
 
 	return ret ?: regs->r12;
 }
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index ab0f1963a7bd..7786b608d932 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -560,10 +560,9 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 	}
 
 out:
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(audit_arch(), regs->regs[2],
-				    regs->regs[4], regs->regs[5],
-				    regs->regs[6], regs->regs[7]);
+	audit_syscall_entry(audit_arch(), regs->regs[2],
+			    regs->regs[4], regs->regs[5],
+			    regs->regs[6], regs->regs[7]);
 }
 
 /*
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 09d31c12a5e3..5b43325402bc 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1724,22 +1724,20 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->gpr[0]);
 
-	if (unlikely(current->audit_context)) {
 #ifdef CONFIG_PPC64
-		if (!is_32bit_task())
-			audit_syscall_entry(AUDIT_ARCH_PPC64,
-					    regs->gpr[0],
-					    regs->gpr[3], regs->gpr[4],
-					    regs->gpr[5], regs->gpr[6]);
-		else
+	if (!is_32bit_task())
+		audit_syscall_entry(AUDIT_ARCH_PPC64,
+				    regs->gpr[0],
+				    regs->gpr[3], regs->gpr[4],
+				    regs->gpr[5], regs->gpr[6]);
+	else
 #endif
-			audit_syscall_entry(AUDIT_ARCH_PPC,
-					    regs->gpr[0],
-					    regs->gpr[3] & 0xffffffff,
-					    regs->gpr[4] & 0xffffffff,
-					    regs->gpr[5] & 0xffffffff,
-					    regs->gpr[6] & 0xffffffff);
-	}
+		audit_syscall_entry(AUDIT_ARCH_PPC,
+				    regs->gpr[0],
+				    regs->gpr[3] & 0xffffffff,
+				    regs->gpr[4] & 0xffffffff,
+				    regs->gpr[5] & 0xffffffff,
+				    regs->gpr[6] & 0xffffffff);
 
 	return ret ?: regs->gpr[0];
 }
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index f52758600980..9d82ed4bcb27 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -740,12 +740,11 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->gprs[2]);
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(is_compat_task() ?
-					AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
-				    regs->gprs[2], regs->orig_gpr2,
-				    regs->gprs[3], regs->gprs[4],
-				    regs->gprs[5]);
+	audit_syscall_entry(is_compat_task() ?
+				AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
+			    regs->gprs[2], regs->orig_gpr2,
+			    regs->gprs[3], regs->gprs[4],
+			    regs->gprs[5]);
 	return ret ?: regs->gprs[2];
 }
 
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index c0b5c179d27b..a3e651563763 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -518,10 +518,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[0]);
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(audit_arch(), regs->regs[3],
-				    regs->regs[4], regs->regs[5],
-				    regs->regs[6], regs->regs[7]);
+	audit_syscall_entry(audit_arch(), regs->regs[3],
+			    regs->regs[4], regs->regs[5],
+			    regs->regs[6], regs->regs[7]);
 
 	return ret ?: regs->regs[0];
 }
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index ba720d686435..3d0080b5c976 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -536,10 +536,9 @@ asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[9]);
 
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(audit_arch(), regs->regs[1],
-				    regs->regs[2], regs->regs[3],
-				    regs->regs[4], regs->regs[5]);
+	audit_syscall_entry(audit_arch(), regs->regs[1],
+			    regs->regs[2], regs->regs[3],
+			    regs->regs[4], regs->regs[5]);
 
 	return ret ?: regs->regs[9];
 }
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index c73c8c50f117..9388844cd88c 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -1071,15 +1071,14 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->u_regs[UREG_G1]);
 
-	if (unlikely(current->audit_context) && !ret)
-		audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
-				     AUDIT_ARCH_SPARC :
-				     AUDIT_ARCH_SPARC64),
-				    regs->u_regs[UREG_G1],
-				    regs->u_regs[UREG_I0],
-				    regs->u_regs[UREG_I1],
-				    regs->u_regs[UREG_I2],
-				    regs->u_regs[UREG_I3]);
+	audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
+			     AUDIT_ARCH_SPARC :
+			     AUDIT_ARCH_SPARC64),
+			    regs->u_regs[UREG_G1],
+			    regs->u_regs[UREG_I0],
+			    regs->u_regs[UREG_I1],
+			    regs->u_regs[UREG_I2],
+			    regs->u_regs[UREG_I3]);
 
 	return ret;
 }
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 2ccf25c42feb..06b190390505 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -167,17 +167,15 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit)
 	int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit;
 	int tracesysgood;
 
-	if (unlikely(current->audit_context)) {
-		if (!entryexit)
-			audit_syscall_entry(HOST_AUDIT_ARCH,
-					    UPT_SYSCALL_NR(regs),
-					    UPT_SYSCALL_ARG1(regs),
-					    UPT_SYSCALL_ARG2(regs),
-					    UPT_SYSCALL_ARG3(regs),
-					    UPT_SYSCALL_ARG4(regs));
-		else
-			audit_syscall_exit(regs);
-	}
+	if (!entryexit)
+		audit_syscall_entry(HOST_AUDIT_ARCH,
+				    UPT_SYSCALL_NR(regs),
+				    UPT_SYSCALL_ARG1(regs),
+				    UPT_SYSCALL_ARG2(regs),
+				    UPT_SYSCALL_ARG3(regs),
+				    UPT_SYSCALL_ARG4(regs));
+	else
+		audit_syscall_exit(regs);
 
 	/* Fake a debug trap */
 	if (is_singlestep)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 025f0f01d254..cecfd9a8f734 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -192,7 +192,7 @@ sysexit_from_sys_call:
 	movl %ebx,%edx			/* 3rd arg: 1st syscall arg */
 	movl %eax,%esi			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_I386,%edi	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
 	ja ia32_badsys
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index a22facf06f0e..1ccd742eba1b 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -456,7 +456,7 @@ sysenter_audit:
 	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
 	movl %eax,%edx			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	pushl_cfi %ebx
 	movl PT_EAX(%esp),%eax		/* reload syscall number */
 	jmp sysenter_do_call
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e51393dd93a3..1ca66b650123 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -549,7 +549,7 @@ badsys:
 #ifdef CONFIG_AUDITSYSCALL
 	/*
 	 * Fast path for syscall audit without full syscall trace.
-	 * We just call audit_syscall_entry() directly, and then
+	 * We just call __audit_syscall_entry() directly, and then
 	 * jump back to the normal fast path.
 	 */
 auditsys:
@@ -559,7 +559,7 @@ auditsys:
 	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
 	movq %rax,%rsi			/* 2nd arg: syscall number */
 	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
-	call audit_syscall_entry
+	call __audit_syscall_entry
 	LOAD_ARGS 0		/* reload call-clobbered registers */
 	jmp system_call_fastpath
 
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 8b0218758775..50267386b766 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1392,20 +1392,18 @@ long syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->orig_ax);
 
-	if (unlikely(current->audit_context)) {
-		if (IS_IA32)
-			audit_syscall_entry(AUDIT_ARCH_I386,
-					    regs->orig_ax,
-					    regs->bx, regs->cx,
-					    regs->dx, regs->si);
+	if (IS_IA32)
+		audit_syscall_entry(AUDIT_ARCH_I386,
+				    regs->orig_ax,
+				    regs->bx, regs->cx,
+				    regs->dx, regs->si);
 #ifdef CONFIG_X86_64
-		else
-			audit_syscall_entry(AUDIT_ARCH_X86_64,
-					    regs->orig_ax,
-					    regs->di, regs->si,
-					    regs->dx, regs->r10);
+	else
+		audit_syscall_entry(AUDIT_ARCH_X86_64,
+				    regs->orig_ax,
+				    regs->di, regs->si,
+				    regs->dx, regs->r10);
 #endif
-	}
 
 	return ret ?: regs->orig_ax;
 }
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index a0d042aa2967..2dff698ab02e 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -334,8 +334,7 @@ void do_syscall_trace_enter(struct pt_regs *regs)
 		do_syscall_trace();
 
 #if 0
-	if (unlikely(current->audit_context))
-		audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
+	audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
 #endif
 }
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3d65e4b3ba06..f56ce2669b83 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -418,9 +418,9 @@ extern int audit_classify_arch(int arch);
 extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
 extern void audit_free(struct task_struct *task);
-extern void audit_syscall_entry(int arch,
-				int major, unsigned long a0, unsigned long a1,
-				unsigned long a2, unsigned long a3);
+extern void __audit_syscall_entry(int arch,
+				  int major, unsigned long a0, unsigned long a1,
+				  unsigned long a2, unsigned long a3);
 extern void __audit_syscall_exit(int ret_success, long ret_value);
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
@@ -435,6 +435,13 @@ static inline int audit_dummy_context(void)
 	void *p = current->audit_context;
 	return !p || *(int *)p;
 }
+static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
+				       unsigned long a1, unsigned long a2,
+				       unsigned long a3)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_syscall_entry(arch, major, a0, a1, a2, a3);
+}
 static inline void audit_syscall_exit(void *pt_regs)
 {
 	if (unlikely(current->audit_context)) {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3d2853808185..b408100dd6ef 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1632,7 +1632,7 @@ void audit_free(struct task_struct *tsk)
  * will only be written if another part of the kernel requests that it
  * be written).
  */
-void audit_syscall_entry(int arch, int major,
+void __audit_syscall_entry(int arch, int major,
 			 unsigned long a1, unsigned long a2,
 			 unsigned long a3, unsigned long a4)
 {
-- 
cgit v1.2.3


From 07c49417877f8658a6aa0ad9b4e21e4fd4df11b6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: inline checks for not needing to collect aux records

A number of audit hooks make function calls before they determine that
auxilary records do not need to be collected.  Do those checks as static
inlines since the most common case is going to be that records are not
needed and we can skip the function call overhead.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 23 ++++++++++++++++++++---
 kernel/auditsc.c      | 15 +++------------
 2 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index f56ce2669b83..cf16faff6b8a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -489,9 +489,9 @@ extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern void audit_log_task_context(struct audit_buffer *ab);
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
-extern int audit_bprm(struct linux_binprm *bprm);
-extern void audit_socketcall(int nargs, unsigned long *args);
-extern int audit_sockaddr(int len, void *addr);
+extern int __audit_bprm(struct linux_binprm *bprm);
+extern void __audit_socketcall(int nargs, unsigned long *args);
+extern int __audit_sockaddr(int len, void *addr);
 extern void __audit_fd_pair(int fd1, int fd2);
 extern int audit_set_macxattr(const char *name);
 extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr);
@@ -519,6 +519,23 @@ static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid
 	if (unlikely(!audit_dummy_context()))
 		__audit_ipc_set_perm(qbytes, uid, gid, mode);
 }
+static inline int audit_bprm(struct linux_binprm *bprm)
+{
+	if (unlikely(!audit_dummy_context()))
+		return __audit_bprm(bprm);
+	return 0;
+}
+static inline void audit_socketcall(int nargs, unsigned long *args)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_socketcall(nargs, args);
+}
+static inline int audit_sockaddr(int len, void *addr)
+{
+	if (unlikely(!audit_dummy_context()))
+		return __audit_sockaddr(len, addr);
+	return 0;
+}
 static inline void audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
 {
 	if (unlikely(!audit_dummy_context()))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index d7382c2aaa9e..e1062f66b01b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2309,14 +2309,11 @@ void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mo
 	context->ipc.has_perm = 1;
 }
 
-int audit_bprm(struct linux_binprm *bprm)
+int __audit_bprm(struct linux_binprm *bprm)
 {
 	struct audit_aux_data_execve *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!audit_enabled || !context || context->dummy))
-		return 0;
-
 	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
 	if (!ax)
 		return -ENOMEM;
@@ -2337,13 +2334,10 @@ int audit_bprm(struct linux_binprm *bprm)
  * @args: args array
  *
  */
-void audit_socketcall(int nargs, unsigned long *args)
+void __audit_socketcall(int nargs, unsigned long *args)
 {
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context || context->dummy))
-		return;
-
 	context->type = AUDIT_SOCKETCALL;
 	context->socketcall.nargs = nargs;
 	memcpy(context->socketcall.args, args, nargs * sizeof(unsigned long));
@@ -2369,13 +2363,10 @@ void __audit_fd_pair(int fd1, int fd2)
  *
  * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_sockaddr(int len, void *a)
+int __audit_sockaddr(int len, void *a)
 {
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context || context->dummy))
-		return 0;
-
 	if (!context->sockaddr) {
 		void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL);
 		if (!p)
-- 
cgit v1.2.3


From 38cdce53daa0408a61fe6d86fe48f31515c9b840 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: drop audit_set_macxattr as it doesn't do anything

unused.  deleted.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index cf16faff6b8a..4f1efe3e8616 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -493,7 +493,6 @@ extern int __audit_bprm(struct linux_binprm *bprm);
 extern void __audit_socketcall(int nargs, unsigned long *args);
 extern int __audit_sockaddr(int len, void *addr);
 extern void __audit_fd_pair(int fd1, int fd2);
-extern int audit_set_macxattr(const char *name);
 extern void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr);
 extern void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec *abs_timeout);
 extern void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification);
@@ -606,7 +605,6 @@ extern int audit_signals;
 #define audit_socketcall(n,a) ((void)0)
 #define audit_fd_pair(n,a) ((void)0)
 #define audit_sockaddr(len, addr) ({ 0; })
-#define audit_set_macxattr(n) do { ; } while (0)
 #define audit_mq_open(o,m,a) ((void)0)
 #define audit_mq_sendrecv(d,l,p,t) ((void)0)
 #define audit_mq_notify(d,n) ((void)0)
-- 
cgit v1.2.3


From a4ff8dba7d8ce5ceb43fb27df66292251cc73bdc Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: inline audit_free to simplify the look of generic code

make the conditional a static inline instead of doing it in generic code.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 7 ++++++-
 kernel/auditsc.c      | 2 +-
 kernel/exit.c         | 3 +--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 4f1efe3e8616..8eb8bda749b3 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -417,7 +417,7 @@ extern int audit_classify_arch(int arch);
 				/* Public API */
 extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
-extern void audit_free(struct task_struct *task);
+extern void __audit_free(struct task_struct *task);
 extern void __audit_syscall_entry(int arch,
 				  int major, unsigned long a0, unsigned long a1,
 				  unsigned long a2, unsigned long a3);
@@ -435,6 +435,11 @@ static inline int audit_dummy_context(void)
 	void *p = current->audit_context;
 	return !p || *(int *)p;
 }
+static inline void audit_free(struct task_struct *task)
+{
+	if (unlikely(task->audit_context))
+		__audit_free(task);
+}
 static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e1062f66b01b..7aaeb38b262a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1594,7 +1594,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
  *
  * Called from copy_process and do_exit
  */
-void audit_free(struct task_struct *tsk)
+void __audit_free(struct task_struct *tsk)
 {
 	struct audit_context *context;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 94ed6e20bb53..88dcbbc446f7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -964,8 +964,7 @@ NORET_TYPE void do_exit(long code)
 	acct_collect(code, group_dead);
 	if (group_dead)
 		tty_audit_exit();
-	if (unlikely(tsk->audit_context))
-		audit_free(tsk);
+	audit_free(tsk);
 
 	tsk->exit_code = code;
 	taskstats_exit(tsk, group_dead);
-- 
cgit v1.2.3


From 6422e78de6880c66a82af512d9bd0c85eb62e661 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: remove audit_finish_fork as it can't be called

Audit entry,always rules are not allowed and are automatically changed in
exit,always rules in userspace.  The kernel refuses to load such rules.

Thus a task in the middle of a syscall (and thus in audit_finish_fork())
can only be in one of two states: AUDIT_BUILD_CONTEXT or AUDIT_DISABLED.
Since the current task cannot be in AUDIT_RECORD_CONTEXT we aren't every
going to actually use the code in audit_finish_fork() since it will
return without doing anything.  Thus drop the code.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  2 --
 kernel/auditsc.c      | 20 --------------------
 kernel/fork.c         |  2 --
 3 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8eb8bda749b3..67b66c37a254 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -415,7 +415,6 @@ extern int audit_classify_arch(int arch);
 #ifdef CONFIG_AUDITSYSCALL
 /* These are defined in auditsc.c */
 				/* Public API */
-extern void audit_finish_fork(struct task_struct *child);
 extern int  audit_alloc(struct task_struct *task);
 extern void __audit_free(struct task_struct *task);
 extern void __audit_syscall_entry(int arch,
@@ -586,7 +585,6 @@ static inline void audit_mmap_fd(int fd, int flags)
 extern int audit_n_rules;
 extern int audit_signals;
 #else /* CONFIG_AUDITSYSCALL */
-#define audit_finish_fork(t)
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
 #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7aaeb38b262a..4d8920f5ab88 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1707,26 +1707,6 @@ void __audit_syscall_entry(int arch, int major,
 	context->ppid       = 0;
 }
 
-void audit_finish_fork(struct task_struct *child)
-{
-	struct audit_context *ctx = current->audit_context;
-	struct audit_context *p = child->audit_context;
-	if (!p || !ctx)
-		return;
-	if (!ctx->in_syscall || ctx->current_state != AUDIT_RECORD_CONTEXT)
-		return;
-	p->arch = ctx->arch;
-	p->major = ctx->major;
-	memcpy(p->argv, ctx->argv, sizeof(ctx->argv));
-	p->ctime = ctx->ctime;
-	p->dummy = ctx->dummy;
-	p->in_syscall = ctx->in_syscall;
-	p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL);
-	p->ppid = current->pid;
-	p->prio = ctx->prio;
-	p->current_state = ctx->current_state;
-}
-
 /**
  * audit_syscall_exit - deallocate audit context after a system call
  * @pt_regs: syscall registers
diff --git a/kernel/fork.c b/kernel/fork.c
index 443f5125f11e..c1e5c21f48c1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1525,8 +1525,6 @@ long do_fork(unsigned long clone_flags,
 			init_completion(&vfork);
 		}
 
-		audit_finish_fork(p);
-
 		/*
 		 * We set PF_STARTING at creation in case tracing wants to
 		 * use this to distinguish a fully live task from one that
-- 
cgit v1.2.3


From efaffd6e4417860c67576ac760dd6e8bbd15f006 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: allow matching on obj_uid

Allow syscall exit filter matching based on the uid of the owner of an
inode used in a syscall.  aka:

auditctl -a always,exit -S open -F obj_uid=0 -F perm=wa

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  1 +
 kernel/auditfilter.c  |  1 +
 kernel/auditsc.c      | 12 ++++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 67b66c37a254..55cb3daaf474 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -223,6 +223,7 @@
 #define AUDIT_PERM	106
 #define AUDIT_DIR	107
 #define AUDIT_FILETYPE	108
+#define AUDIT_OBJ_UID	109
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 903caa269b5c..13e997423dcd 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -461,6 +461,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_ARG1:
 		case AUDIT_ARG2:
 		case AUDIT_ARG3:
+		case AUDIT_OBJ_UID:
 			break;
 		case AUDIT_ARCH:
 			entry->rule.arch_f = f;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4d8920f5ab88..5cf3ecc01517 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -586,6 +586,18 @@ static int audit_filter_rules(struct task_struct *tsk,
 				}
 			}
 			break;
+		case AUDIT_OBJ_UID:
+			if (name) {
+				result = audit_comparator(name->uid, f->op, f->val);
+			} else if (ctx) {
+				list_for_each_entry(n, &ctx->names_list, list) {
+					if (audit_comparator(n->uid, f->op, f->val)) {
+						++result;
+						break;
+					}
+				}
+			}
+			break;
 		case AUDIT_WATCH:
 			if (name)
 				result = audit_watch_compare(rule->watch, name->ino, name->dev);
-- 
cgit v1.2.3


From 54d3218b31aee5bc9c859ae60fbde933d922448b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:07 -0500
Subject: audit: allow audit matching on inode gid

Much like the ability to filter audit on the uid of an inode collected, we
should be able to filter on the gid of the inode.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  1 +
 kernel/auditfilter.c  |  1 +
 kernel/auditsc.c      | 12 ++++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 55cb3daaf474..e36aa37c88af 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -224,6 +224,7 @@
 #define AUDIT_DIR	107
 #define AUDIT_FILETYPE	108
 #define AUDIT_OBJ_UID	109
+#define AUDIT_OBJ_GID	110
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 13e997423dcd..f10605c787e6 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -462,6 +462,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_ARG2:
 		case AUDIT_ARG3:
 		case AUDIT_OBJ_UID:
+		case AUDIT_OBJ_GID:
 			break;
 		case AUDIT_ARCH:
 			entry->rule.arch_f = f;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 5cf3ecc01517..87b375fb12ff 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -598,6 +598,18 @@ static int audit_filter_rules(struct task_struct *tsk,
 				}
 			}
 			break;
+		case AUDIT_OBJ_GID:
+			if (name) {
+				result = audit_comparator(name->gid, f->op, f->val);
+			} else if (ctx) {
+				list_for_each_entry(n, &ctx->names_list, list) {
+					if (audit_comparator(n->gid, f->op, f->val)) {
+						++result;
+						break;
+					}
+				}
+			}
+			break;
 		case AUDIT_WATCH:
 			if (name)
 				result = audit_watch_compare(rule->watch, name->ino, name->dev);
-- 
cgit v1.2.3


From 0a300be6d5be8f66cd96609334710c268d0bfdce Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:08 -0500
Subject: audit: remove task argument to audit_set_loginuid

The function always deals with current.  Don't expose an option
pretending one can use it for something.  You can't.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/proc/base.c        | 2 +-
 include/linux/audit.h | 2 +-
 kernel/auditsc.c      | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8173dfd89cb2..e3cbebbabebd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1228,7 +1228,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
 		goto out_free_page;
 
 	}
-	length = audit_set_loginuid(current, loginuid);
+	length = audit_set_loginuid(loginuid);
 	if (likely(length == 0))
 		length = count;
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index e36aa37c88af..7cbd6fe41573 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -489,7 +489,7 @@ static inline void audit_ptrace(struct task_struct *t)
 extern unsigned int audit_serial(void);
 extern int auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec *t, unsigned int *serial);
-extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
+extern int  audit_set_loginuid(uid_t loginuid);
 #define audit_get_loginuid(t) ((t)->loginuid)
 #define audit_get_sessionid(t) ((t)->sessionid)
 extern void audit_log_task_context(struct audit_buffer *ab);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 87b375fb12ff..9d6dd7d869c0 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2163,16 +2163,16 @@ int auditsc_get_stamp(struct audit_context *ctx,
 static atomic_t session_id = ATOMIC_INIT(0);
 
 /**
- * audit_set_loginuid - set a task's audit_context loginuid
- * @task: task whose audit context is being modified
+ * audit_set_loginuid - set current task's audit_context loginuid
  * @loginuid: loginuid value
  *
  * Returns 0.
  *
  * Called (set) from fs/proc/base.c::proc_loginuid_write().
  */
-int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
+int audit_set_loginuid(uid_t loginuid)
 {
+	struct task_struct *task = current;
 	unsigned int sessionid = atomic_inc_return(&session_id);
 	struct audit_context *context = task->audit_context;
 
-- 
cgit v1.2.3


From 02d86a568c6d2d335256864451ac8ce781bc5652 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:08 -0500
Subject: audit: allow interfield comparison in audit rules

We wish to be able to audit when a uid=500 task accesses a file which is
uid=0.  Or vice versa.  This patch introduces a new audit filter type
AUDIT_FIELD_COMPARE which takes as an 'enum' which indicates which fields
should be compared.  At this point we only define the task->uid vs
inode->uid, but other comparisons can be added.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h |  4 ++++
 kernel/auditfilter.c  |  5 ++++-
 kernel/auditsc.c      | 30 +++++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 7cbd6fe41573..838e05fc0582 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -182,7 +182,10 @@
  * AUDIT_UNUSED_BITS is updated if need be. */
 #define AUDIT_UNUSED_BITS	0x07FFFC00
 
+/* AUDIT_FIELD_COMPARE rule list */
+#define AUDIT_COMPARE_UID_TO_OBJ_UID	1
 
+#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_UID_TO_OBJ_UID
 /* Rule fields */
 				/* These are useful when checking the
 				 * task structure at task creation time
@@ -225,6 +228,7 @@
 #define AUDIT_FILETYPE	108
 #define AUDIT_OBJ_UID	109
 #define AUDIT_OBJ_GID	110
+#define AUDIT_FIELD_COMPARE	111
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index f10605c787e6..a6c3f1abd206 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -526,7 +526,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 				goto exit_free;
 			break;
 		case AUDIT_FILTERKEY:
-			err = -EINVAL;
 			if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
 				goto exit_free;
 			str = audit_unpack_string(&bufp, &remain, f->val);
@@ -543,6 +542,10 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 			if (f->val & ~S_IFMT)
 				goto exit_free;
 			break;
+		case AUDIT_FIELD_COMPARE:
+			if (f->val > AUDIT_MAX_FIELD_COMPARE)
+				goto exit_free;
+			break;
 		default:
 			goto exit_free;
 		}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9161e70a4379..8fb2c8e6d624 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -463,6 +463,32 @@ static int match_tree_refs(struct audit_context *ctx, struct audit_tree *tree)
 	return 0;
 }
 
+static int audit_field_compare(struct task_struct *tsk,
+			       const struct cred *cred,
+			       struct audit_field *f,
+			       struct audit_context *ctx,
+			       struct audit_names *name)
+{
+	struct audit_names *n;
+
+	switch (f->val) {
+	case AUDIT_COMPARE_UID_TO_OBJ_UID:
+		if (name) {
+			return audit_comparator(cred->uid, f->op, name->uid);
+		} else if (ctx) {
+			list_for_each_entry(n, &ctx->names_list, list) {
+				if (audit_comparator(cred->uid, f->op, n->uid))
+					return 1;
+			}
+		}
+		break;
+	default:
+		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
+		return 0;
+	}
+	return 0;
+}
+
 /* Determine if any context name data matches a rule's watch data */
 /* Compare a task_struct with an audit_rule.  Return 1 on match, 0
  * otherwise.
@@ -693,8 +719,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 		case AUDIT_FILETYPE:
 			result = audit_match_filetype(ctx, f->val);
 			break;
+		case AUDIT_FIELD_COMPARE:
+			result = audit_field_compare(tsk, cred, f, ctx, name);
+			break;
 		}
-
 		if (!result)
 			return 0;
 	}
-- 
cgit v1.2.3


From c9fe685f7a17a0ee8bf3fbe51e40b1c8b8e65896 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 3 Jan 2012 14:23:08 -0500
Subject: audit: allow interfield comparison between gid and ogid

Allow audit rules to compare the gid of the running task to the gid of the
inode in question.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 3 ++-
 kernel/auditsc.c      | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 838e05fc0582..fffbc2176ee1 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -184,8 +184,9 @@
 
 /* AUDIT_FIELD_COMPARE rule list */
 #define AUDIT_COMPARE_UID_TO_OBJ_UID	1
+#define AUDIT_COMPARE_GID_TO_OBJ_GID	2
 
-#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_UID_TO_OBJ_UID
+#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_GID_TO_OBJ_GID
 /* Rule fields */
 				/* These are useful when checking the
 				 * task structure at task creation time
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b12cc32fe377..861c7b9c565a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -474,6 +474,8 @@ static int audit_compare_id(uid_t uid1,
 	uid_t uid2;
 	int rc;
 
+	BUILD_BUG_ON(sizeof(uid_t) != sizeof(gid_t));
+
 	if (name) {
 		addr = (unsigned long)name;
 		addr += name_offset;
@@ -510,6 +512,10 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_id(cred->uid,
 					name, offsetof(struct audit_names, uid),
 					f, ctx);
+	case AUDIT_COMPARE_GID_TO_OBJ_GID:
+		return audit_compare_id(cred->gid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
 	default:
 		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
 		return 0;
-- 
cgit v1.2.3


From 4a6633ed08af5ba67790b4d1adcdeb8ceb55677e Mon Sep 17 00:00:00 2001
From: Peter Moody <pmoody@google.com>
Date: Tue, 13 Dec 2011 16:17:51 -0800
Subject: audit: implement all object interfield comparisons

This completes the matrix of interfield comparisons between uid/gid
information for the current task and the uid/gid information for inodes.
aka I can audit based on differences between the euid of the process and
the uid of fs objects.

Signed-off-by: Peter Moody <pmoody@google.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 10 +++++++++-
 kernel/auditsc.c      | 29 +++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index fffbc2176ee1..67113cb4bc15 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -185,8 +185,16 @@
 /* AUDIT_FIELD_COMPARE rule list */
 #define AUDIT_COMPARE_UID_TO_OBJ_UID	1
 #define AUDIT_COMPARE_GID_TO_OBJ_GID	2
+#define AUDIT_COMPARE_EUID_TO_OBJ_UID	3
+#define AUDIT_COMPARE_EGID_TO_OBJ_GID	4
+#define AUDIT_COMPARE_AUID_TO_OBJ_UID	5
+#define AUDIT_COMPARE_SUID_TO_OBJ_UID	6
+#define AUDIT_COMPARE_SGID_TO_OBJ_GID	7
+#define AUDIT_COMPARE_FSUID_TO_OBJ_UID	8
+#define AUDIT_COMPARE_FSGID_TO_OBJ_GID	9
+
+#define AUDIT_MAX_FIELD_COMPARE		AUDIT_COMPARE_FSGID_TO_OBJ_GID
 
-#define AUDIT_MAX_FIELD_COMPARE	AUDIT_COMPARE_GID_TO_OBJ_GID
 /* Rule fields */
 				/* These are useful when checking the
 				 * task structure at task creation time
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 861c7b9c565a..b8cee462b99e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -508,6 +508,7 @@ static int audit_field_compare(struct task_struct *tsk,
 			       struct audit_names *name)
 {
 	switch (f->val) {
+	/* process to file object comparisons */
 	case AUDIT_COMPARE_UID_TO_OBJ_UID:
 		return audit_compare_id(cred->uid,
 					name, offsetof(struct audit_names, uid),
@@ -516,6 +517,34 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_id(cred->gid,
 					name, offsetof(struct audit_names, gid),
 					f, ctx);
+	case AUDIT_COMPARE_EUID_TO_OBJ_UID:
+		return audit_compare_id(cred->euid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_EGID_TO_OBJ_GID:
+		return audit_compare_id(cred->egid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
+	case AUDIT_COMPARE_AUID_TO_OBJ_UID:
+		return audit_compare_id(tsk->loginuid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_SUID_TO_OBJ_UID:
+		return audit_compare_id(cred->suid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_SGID_TO_OBJ_GID:
+		return audit_compare_id(cred->sgid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
+	case AUDIT_COMPARE_FSUID_TO_OBJ_UID:
+		return audit_compare_id(cred->fsuid,
+					name, offsetof(struct audit_names, uid),
+					f, ctx);
+	case AUDIT_COMPARE_FSGID_TO_OBJ_GID:
+		return audit_compare_id(cred->fsgid,
+					name, offsetof(struct audit_names, gid),
+					f, ctx);
 	default:
 		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
 		return 0;
-- 
cgit v1.2.3


From 10d68360871657204885371cdf2594412675d2f9 Mon Sep 17 00:00:00 2001
From: Peter Moody <pmoody@google.com>
Date: Wed, 4 Jan 2012 15:24:31 -0500
Subject: audit: comparison on interprocess fields

This allows audit to specify rules in which we compare two fields of a
process.  Such as is the running process uid != to the running process
euid?

Signed-off-by: Peter Moody <pmoody@google.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 24 +++++++++++++++++++++++-
 kernel/auditsc.c      | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 67113cb4bc15..9ff7a2c48b50 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -193,7 +193,29 @@
 #define AUDIT_COMPARE_FSUID_TO_OBJ_UID	8
 #define AUDIT_COMPARE_FSGID_TO_OBJ_GID	9
 
-#define AUDIT_MAX_FIELD_COMPARE		AUDIT_COMPARE_FSGID_TO_OBJ_GID
+#define AUDIT_COMPARE_UID_TO_AUID	10
+#define AUDIT_COMPARE_UID_TO_EUID	11
+#define AUDIT_COMPARE_UID_TO_FSUID	12
+#define AUDIT_COMPARE_UID_TO_SUID	13
+
+#define AUDIT_COMPARE_AUID_TO_FSUID	14
+#define AUDIT_COMPARE_AUID_TO_SUID	15
+#define AUDIT_COMPARE_AUID_TO_EUID	16
+
+#define AUDIT_COMPARE_EUID_TO_SUID	17
+#define AUDIT_COMPARE_EUID_TO_FSUID	18
+
+#define AUDIT_COMPARE_SUID_TO_FSUID	19
+
+#define AUDIT_COMPARE_GID_TO_EGID	20
+#define AUDIT_COMPARE_GID_TO_FSGID	21
+#define AUDIT_COMPARE_GID_TO_SGID	22
+
+#define AUDIT_COMPARE_EGID_TO_FSGID	23
+#define AUDIT_COMPARE_EGID_TO_SGID	24
+#define AUDIT_COMPARE_SGID_TO_FSGID	25
+
+#define AUDIT_MAX_FIELD_COMPARE		AUDIT_COMPARE_SGID_TO_FSGID
 
 /* Rule fields */
 				/* These are useful when checking the
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b8cee462b99e..593237e3654d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -545,6 +545,45 @@ static int audit_field_compare(struct task_struct *tsk,
 		return audit_compare_id(cred->fsgid,
 					name, offsetof(struct audit_names, gid),
 					f, ctx);
+	/* uid comparisons */
+	case AUDIT_COMPARE_UID_TO_AUID:
+		return audit_comparator(cred->uid, f->op, tsk->loginuid);
+	case AUDIT_COMPARE_UID_TO_EUID:
+		return audit_comparator(cred->uid, f->op, cred->euid);
+	case AUDIT_COMPARE_UID_TO_SUID:
+		return audit_comparator(cred->uid, f->op, cred->suid);
+	case AUDIT_COMPARE_UID_TO_FSUID:
+		return audit_comparator(cred->uid, f->op, cred->fsuid);
+	/* auid comparisons */
+	case AUDIT_COMPARE_AUID_TO_EUID:
+		return audit_comparator(tsk->loginuid, f->op, cred->euid);
+	case AUDIT_COMPARE_AUID_TO_SUID:
+		return audit_comparator(tsk->loginuid, f->op, cred->suid);
+	case AUDIT_COMPARE_AUID_TO_FSUID:
+		return audit_comparator(tsk->loginuid, f->op, cred->fsuid);
+	/* euid comparisons */
+	case AUDIT_COMPARE_EUID_TO_SUID:
+		return audit_comparator(cred->euid, f->op, cred->suid);
+	case AUDIT_COMPARE_EUID_TO_FSUID:
+		return audit_comparator(cred->euid, f->op, cred->fsuid);
+	/* suid comparisons */
+	case AUDIT_COMPARE_SUID_TO_FSUID:
+		return audit_comparator(cred->suid, f->op, cred->fsuid);
+	/* gid comparisons */
+	case AUDIT_COMPARE_GID_TO_EGID:
+		return audit_comparator(cred->gid, f->op, cred->egid);
+	case AUDIT_COMPARE_GID_TO_SGID:
+		return audit_comparator(cred->gid, f->op, cred->sgid);
+	case AUDIT_COMPARE_GID_TO_FSGID:
+		return audit_comparator(cred->gid, f->op, cred->fsgid);
+	/* egid comparisons */
+	case AUDIT_COMPARE_EGID_TO_SGID:
+		return audit_comparator(cred->egid, f->op, cred->sgid);
+	case AUDIT_COMPARE_EGID_TO_FSGID:
+		return audit_comparator(cred->egid, f->op, cred->fsgid);
+	/* sgid comparison */
+	case AUDIT_COMPARE_SGID_TO_FSGID:
+		return audit_comparator(cred->sgid, f->op, cred->fsgid);
 	default:
 		WARN(1, "Missing AUDIT_COMPARE define.  Report as a bug\n");
 		return 0;
-- 
cgit v1.2.3


From 67175b855bfd6ed95ffeff95532173c07de6432d Mon Sep 17 00:00:00 2001
From: James Bottomley <jbottomley@parallels.com>
Date: Tue, 17 Jan 2012 21:14:05 +0000
Subject: Fix compile breakage with kref.h

This set of build failures just started appearing on parisc:

  In file included from drivers/input/serio/serio_raw.c:12:
  include/linux/kref.h: In function 'kref_get':
  include/linux/kref.h:40: error: 'TAINT_WARN' undeclared (first use in this function)
  include/linux/kref.h:40: error: (Each undeclared identifier is reported only once
  include/linux/kref.h:40: error: for each function it appears in.)
  include/linux/kref.h: In function 'kref_sub':
  include/linux/kref.h:65: error: 'TAINT_WARN' undeclared (first use in this function)

It happens because TAINT_WARN is defined in kernel.h and this particular
compile doesn't seem to include it (no idea why it's just manifesting ..
probably some #include file untangling exposed it).

Fix by adding

  #include <linux/kernel.h>

to linux/kref.h

Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kref.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index abc0120b09b7..9c07dcebded7 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -17,6 +17,7 @@
 
 #include <linux/bug.h>
 #include <linux/atomic.h>
+#include <linux/kernel.h>
 
 struct kref {
 	atomic_t refcount;
-- 
cgit v1.2.3


From ee0b31a25a010116f44fca6c96f4516d417793dd Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Tue, 17 Jan 2012 20:39:51 +0000
Subject: keys: fix trusted/encrypted keys sparse rcu_assign_pointer messages

Define rcu_assign_keypointer(), which uses the key payload.rcudata instead
of payload.data, to resolve the CONFIG_SPARSE_RCU_POINTER message:
"incompatible types in comparison expression (different address spaces)"

Replace the rcu_assign_pointer() calls in encrypted/trusted keys with
rcu_assign_keypointer().

Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key.h                              | 3 +++
 security/keys/encrypted-keys/encrypted.c         | 4 ++--
 security/keys/encrypted-keys/masterkey_trusted.c | 2 ++
 security/keys/trusted.c                          | 4 ++--
 4 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 183a6af7715d..bfc014c57351 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -293,6 +293,9 @@ static inline bool key_is_instantiated(const struct key *key)
 	(rcu_dereference_protected((KEY)->payload.rcudata,		\
 				   rwsem_is_locked(&((struct key *)(KEY))->sem)))
 
+#define rcu_assign_keypointer(KEY, PAYLOAD)				\
+	(rcu_assign_pointer((KEY)->payload.rcudata, PAYLOAD))
+
 #ifdef CONFIG_SYSCTL
 extern ctl_table key_sysctls[];
 #endif
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 41144f71d615..d91efb6901e9 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -810,7 +810,7 @@ static int encrypted_instantiate(struct key *key, const void *data,
 		goto out;
 	}
 
-	rcu_assign_pointer(key->payload.data, epayload);
+	rcu_assign_keypointer(key, epayload);
 out:
 	kfree(datablob);
 	return ret;
@@ -874,7 +874,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen)
 	memcpy(new_epayload->payload_data, epayload->payload_data,
 	       epayload->payload_datalen);
 
-	rcu_assign_pointer(key->payload.data, new_epayload);
+	rcu_assign_keypointer(key, new_epayload);
 	call_rcu(&epayload->rcu, encrypted_rcu_free);
 out:
 	kfree(buf);
diff --git a/security/keys/encrypted-keys/masterkey_trusted.c b/security/keys/encrypted-keys/masterkey_trusted.c
index df87272e3f51..8c16c3e472e7 100644
--- a/security/keys/encrypted-keys/masterkey_trusted.c
+++ b/security/keys/encrypted-keys/masterkey_trusted.c
@@ -18,6 +18,8 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <keys/trusted-type.h>
+#include <keys/encrypted-type.h>
+#include "encrypted.h"
 
 /*
  * request_trusted_key - request the trusted key
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 0ed5fdf238a2..2d5d041f2049 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -993,7 +993,7 @@ out:
 	kfree(datablob);
 	kfree(options);
 	if (!ret)
-		rcu_assign_pointer(key->payload.data, payload);
+		rcu_assign_keypointer(key, payload);
 	else
 		kfree(payload);
 	return ret;
@@ -1067,7 +1067,7 @@ static int trusted_update(struct key *key, const void *data, size_t datalen)
 			goto out;
 		}
 	}
-	rcu_assign_pointer(key->payload.data, new_p);
+	rcu_assign_keypointer(key, new_p);
 	call_rcu(&p->rcu, trusted_rcu_free);
 out:
 	kfree(datablob);
-- 
cgit v1.2.3


From 5e8898e97a5db4125d944070922164d1d09a2689 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Tue, 17 Jan 2012 17:12:03 +0200
Subject: lib: digital signature config option name change

It was reported that DIGSIG is confusing name for digital signature
module. It was suggested to rename DIGSIG to SIGNATURE.

Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
Suggested-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/digsig.h     | 4 ++--
 lib/Kconfig                | 2 +-
 lib/Makefile               | 2 +-
 security/integrity/Kconfig | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/digsig.h b/include/linux/digsig.h
index efae755017d7..b01558b15814 100644
--- a/include/linux/digsig.h
+++ b/include/linux/digsig.h
@@ -46,7 +46,7 @@ struct signature_hdr {
 	char		mpi[0];
 } __packed;
 
-#if defined(CONFIG_DIGSIG) || defined(CONFIG_DIGSIG_MODULE)
+#if defined(CONFIG_SIGNATURE) || defined(CONFIG_SIGNATURE_MODULE)
 
 int digsig_verify(struct key *keyring, const char *sig, int siglen,
 					const char *digest, int digestlen);
@@ -59,6 +59,6 @@ static inline int digsig_verify(struct key *keyring, const char *sig,
 	return -EOPNOTSUPP;
 }
 
-#endif /* CONFIG_DIGSIG */
+#endif /* CONFIG_SIGNATURE */
 
 #endif /* _DIGSIG_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 201e1b33d721..854735d96dc3 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -302,7 +302,7 @@ config MPILIB_EXTRA
 	  This code in unnecessary for RSA digital signature verification,
 	  and can be compiled if needed.
 
-config DIGSIG
+config SIGNATURE
 	tristate "In-kernel signature checker"
 	depends on KEYS
 	select MPILIB
diff --git a/lib/Makefile b/lib/Makefile
index dace162c7e1c..d71aae1b01b3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -119,7 +119,7 @@ obj-$(CONFIG_CORDIC) += cordic.o
 obj-$(CONFIG_DQL) += dynamic_queue_limits.o
 
 obj-$(CONFIG_MPILIB) += mpi/
-obj-$(CONFIG_DIGSIG) += digsig.o
+obj-$(CONFIG_SIGNATURE) += digsig.o
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
diff --git a/security/integrity/Kconfig b/security/integrity/Kconfig
index d384ea921482..ff60bf72881f 100644
--- a/security/integrity/Kconfig
+++ b/security/integrity/Kconfig
@@ -7,7 +7,7 @@ config INTEGRITY_DIGSIG
 	boolean "Digital signature verification using multiple keyrings"
 	depends on INTEGRITY && KEYS
 	default n
-	select DIGSIG
+	select SIGNATURE
 	help
 	  This option enables digital signature verification support
 	  using multiple keyrings. It defines separate keyrings for each
-- 
cgit v1.2.3


From 65b7f839ceecc0a36c7969c0c9151d5748cd4242 Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Tue, 17 Jan 2012 22:40:08 +0100
Subject: intel_idle: Split up and provide per CPU initialization func

Function split up, should have no functional change.

Provides entry point for physically hotplugged CPUs
to initialize and activate cpuidle.

Signed-off-by: Thomas Renninger <trenn@suse.de>
CC: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
CC: Shaohua Li <shaohua.li@intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/idle/intel_idle.c | 82 ++++++++++++++++++++++++-----------------------
 include/linux/cpuidle.h   |  7 ++++
 2 files changed, 49 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 5d2f8e13cf0e..ef0c04d8dc22 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -478,64 +478,60 @@ static int intel_idle_cpuidle_driver_init(void)
 
 
 /*
- * intel_idle_cpuidle_devices_init()
+ * intel_idle_cpu_init()
  * allocate, initialize, register cpuidle_devices
+ * @cpu: cpu/core to initialize
  */
-static int intel_idle_cpuidle_devices_init(void)
+int intel_idle_cpu_init(int cpu)
 {
-	int i, cstate;
+	int cstate;
 	struct cpuidle_device *dev;
 
-	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
-	if (intel_idle_cpuidle_devices == NULL)
-		return -ENOMEM;
-
-	for_each_online_cpu(i) {
-		dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
+	dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu);
 
-		dev->state_count = 1;
+	dev->state_count = 1;
 
-		for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
-			int num_substates;
+	for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
+		int num_substates;
 
-			if (cstate > max_cstate) {
-				printk(PREFIX "max_cstate %d reached\n",
-					max_cstate);
-				break;
-			}
+		if (cstate > max_cstate) {
+			printk(PREFIX "max_cstate %d reached\n",
+			       max_cstate);
+			break;
+		}
 
-			/* does the state exist in CPUID.MWAIT? */
-			num_substates = (mwait_substates >> ((cstate) * 4))
-						& MWAIT_SUBSTATE_MASK;
-			if (num_substates == 0)
-				continue;
-			/* is the state not enabled? */
-			if (cpuidle_state_table[cstate].enter == NULL) {
-				continue;
-			}
+		/* does the state exist in CPUID.MWAIT? */
+		num_substates = (mwait_substates >> ((cstate) * 4))
+			& MWAIT_SUBSTATE_MASK;
+		if (num_substates == 0)
+			continue;
+		/* is the state not enabled? */
+		if (cpuidle_state_table[cstate].enter == NULL)
+			continue;
 
-			dev->states_usage[dev->state_count].driver_data =
-				(void *)get_driver_data(cstate);
+		dev->states_usage[dev->state_count].driver_data =
+			(void *)get_driver_data(cstate);
 
 			dev->state_count += 1;
 		}
+	dev->cpu = cpu;
 
-		dev->cpu = i;
-		if (cpuidle_register_device(dev)) {
-			pr_debug(PREFIX "cpuidle_register_device %d failed!\n",
-				 i);
-			intel_idle_cpuidle_devices_uninit();
-			return -EIO;
-		}
+	if (cpuidle_register_device(dev)) {
+		pr_debug(PREFIX "cpuidle_register_device %d failed!\n", cpu);
+		intel_idle_cpuidle_devices_uninit();
+		return -EIO;
 	}
 
+	if (auto_demotion_disable_flags)
+		smp_call_function_single(cpu, auto_demotion_disable, NULL, 1);
+
 	return 0;
 }
 
 
 static int __init intel_idle_init(void)
 {
-	int retval;
+	int retval, i;
 
 	/* Do not load intel_idle at all for now if idle= is passed */
 	if (boot_option_idle_override != IDLE_NO_OVERRIDE)
@@ -553,10 +549,16 @@ static int __init intel_idle_init(void)
 		return retval;
 	}
 
-	retval = intel_idle_cpuidle_devices_init();
-	if (retval) {
-		cpuidle_unregister_driver(&intel_idle_driver);
-		return retval;
+	intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+	if (intel_idle_cpuidle_devices == NULL)
+		return -ENOMEM;
+
+	for_each_online_cpu(i) {
+		retval = intel_idle_cpu_init(i);
+		if (retval) {
+			cpuidle_unregister_driver(&intel_idle_driver);
+			return retval;
+		}
 	}
 
 	return 0;
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 7408af843b8a..93df66ea794a 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -188,7 +188,14 @@ struct cpuidle_governor {
 extern int cpuidle_register_governor(struct cpuidle_governor *gov);
 extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
 
+#ifdef CONFIG_INTEL_IDLE
+extern int intel_idle_cpu_init(int cpu);
 #else
+static inline int intel_idle_cpu_init(int cpu) { return -1; }
+#endif
+
+#else
+static inline int intel_idle_cpu_init(int cpu) { return -1; }
 
 static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 {return 0;}
-- 
cgit v1.2.3


From 456a8167e94b66f406c27400a46a707b870452b0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 18 Jan 2012 10:04:29 +0000
Subject: KEYS: Permit key_serial() to be called with a const key pointer

Permit key_serial() to be called with a const key pointer.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/key.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index bfc014c57351..5253471cd2ea 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -271,7 +271,7 @@ extern int keyring_add_key(struct key *keyring,
 
 extern struct key *key_lookup(key_serial_t id);
 
-static inline key_serial_t key_serial(struct key *key)
+static inline key_serial_t key_serial(const struct key *key)
 {
 	return key ? key->serial : 0;
 }
-- 
cgit v1.2.3


From 72081624d5ad3cf56deb6e727b78c4e7a55e4eec Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 19 Jan 2012 23:25:33 +0100
Subject: PM / Hibernate: Rewrite unlock_system_sleep() to fix s2disk
 regression

Commit 33e638b, "PM / Sleep: Use the freezer_count() functions in
[un]lock_system_sleep() APIs" introduced an undesirable change in the
behaviour of unlock_system_sleep() since freezer_count() internally calls
try_to_freeze() - which we don't need in unlock_system_sleep().

And commit bcda53f, "PM / Sleep: Replace mutex_[un]lock(&pm_mutex) with
[un]lock_system_sleep()" made these APIs wide-spread. This caused a
regression in suspend-to-disk where snapshot_read() and snapshot_write()
were getting frozen due to the try_to_freeze embedded in
unlock_system_sleep(), since these functions were invoked when the freezing
condition was still in effect.

Fix this by rewriting unlock_system_sleep() by open-coding freezer_count()
and dropping the try_to_freeze() part. Not only will this fix the
regression but this will also ensure that the API only does what it is
intended to do, and nothing more, under the hood.

While at it, make the code more correct and robust by ensuring that the
PF_FREEZER_SKIP flag gets cleared with pm_mutex held, to avoid a race with
the freezer.

Also, to be on the safer side, open-code freezer_do_not_count() as well
(inside lock_system_sleep()), to ensure that any unrelated modification to
freezer[_do_not]_count() does not break things again!

Reported-and-tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/suspend.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 95040cc33107..91784a4f8608 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -357,14 +357,29 @@ extern bool pm_save_wakeup_count(unsigned int count);
 
 static inline void lock_system_sleep(void)
 {
-	freezer_do_not_count();
+	current->flags |= PF_FREEZER_SKIP;
 	mutex_lock(&pm_mutex);
 }
 
 static inline void unlock_system_sleep(void)
 {
+	/*
+	 * Don't use freezer_count() because we don't want the call to
+	 * try_to_freeze() here.
+	 *
+	 * Reason:
+	 * Fundamentally, we just don't need it, because freezing condition
+	 * doesn't come into effect until we release the pm_mutex lock,
+	 * since the freezer always works with pm_mutex held.
+	 *
+	 * More importantly, in the case of hibernation,
+	 * unlock_system_sleep() gets called in snapshot_read() and
+	 * snapshot_write() when the freezing condition is still in effect.
+	 * Which means, if we use try_to_freeze() here, it would make them
+	 * enter the refrigerator, thus causing hibernation to lockup.
+	 */
+	current->flags &= ~PF_FREEZER_SKIP;
 	mutex_unlock(&pm_mutex);
-	freezer_count();
 }
 
 #else /* !CONFIG_PM_SLEEP */
-- 
cgit v1.2.3


From 65f2e753f1eb09d3a7e2a0d16408a5433b4097b2 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 20 Jan 2012 17:38:58 +0000
Subject: Revert "ARM: sa11x0: Implement autoloading of codec and codec pdata
 for mcp bus."

This reverts commit 5dd7bf59e0e8563265b3e5b33276099ef628fcc7.

Conflicts:

	scripts/mod/file2alias.c

This change is wrong on many levels.  First and foremost, it causes a
regression.  On boot on Assabet, which this patch gives a codec id of
'ucb1x00', it gives:

	ucb1x00 ID not found: 1005

0x1005 is a valid ID for the UCB1300 device.

Secondly, this patch is way over the top in terms of complexity.  The
only device which has been seen to be connected with this MCP code is
the UCB1x00 (UCB1200, UCB1300 etc) devices, and they all use the same
driver.  Adding a match table, requiring the codec string to match the
hardware ID read out of the ID register, etc is completely over the top
when we can just read the hardware ID register.
---
 arch/arm/mach-sa1100/assabet.c          |  1 -
 arch/arm/mach-sa1100/cerf.c             |  1 -
 arch/arm/mach-sa1100/collie.c           |  8 +-----
 arch/arm/mach-sa1100/include/mach/mcp.h |  2 --
 arch/arm/mach-sa1100/lart.c             |  1 -
 arch/arm/mach-sa1100/shannon.c          |  1 -
 arch/arm/mach-sa1100/simpad.c           |  8 +-----
 drivers/mfd/mcp-core.c                  | 44 ++----------------------------
 drivers/mfd/mcp-sa11x0.c                |  7 ++---
 drivers/mfd/ucb1x00-core.c              | 48 ++++++++-------------------------
 drivers/mfd/ucb1x00-ts.c                |  2 +-
 include/linux/mfd/mcp.h                 |  7 ++---
 include/linux/mfd/ucb1x00.h             |  5 +---
 include/linux/mod_devicetable.h         | 11 --------
 scripts/mod/file2alias.c                | 10 -------
 15 files changed, 21 insertions(+), 135 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index d8aa1c28353b..0c4b76ab4d8e 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -202,7 +202,6 @@ static struct irda_platform_data assabet_irda_data = {
 static struct mcp_plat_data assabet_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init assabet_init(void)
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index fcadc4cafe9a..11bb6d0b9be3 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -124,7 +124,6 @@ static void __init cerf_map_io(void)
 static struct mcp_plat_data cerf_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init cerf_init(void)
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index 6b7c74b304cf..b9060e236def 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -27,7 +27,6 @@
 #include <linux/timer.h>
 #include <linux/gpio.h>
 #include <linux/pda_power.h>
-#include <linux/mfd/ucb1x00.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -86,15 +85,10 @@ static struct scoop_pcmcia_config collie_pcmcia_config = {
 	.num_devs	= 1,
 };
 
-static struct ucb1x00_plat_data collie_ucb1x00_data = {
-	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
-};
-
 static struct mcp_plat_data collie_mcp_data = {
 	.mccr0		= MCCR0_ADM | MCCR0_ExtClk,
 	.sclk_rate	= 9216000,
-	.codec		= "ucb1x00",
-	.codec_pdata	= &collie_ucb1x00_data,
+	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
 };
 
 /*
diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h
index 586cec898b35..ed1a331508a7 100644
--- a/arch/arm/mach-sa1100/include/mach/mcp.h
+++ b/arch/arm/mach-sa1100/include/mach/mcp.h
@@ -17,8 +17,6 @@ struct mcp_plat_data {
 	u32 mccr1;
 	unsigned int sclk_rate;
 	int gpio_base;
-	const char *codec;
-	void *codec_pdata;
 };
 
 #endif
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index 48a8f4ef0fcd..af4e2761f3db 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -24,7 +24,6 @@
 static struct mcp_plat_data lart_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init lart_init(void)
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 3807c9135272..318b2b766a0b 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -55,7 +55,6 @@ static struct resource shannon_flash_resource = {
 static struct mcp_plat_data shannon_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1x00",
 };
 
 static void __init shannon_init(void)
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index d9b765c441f6..e17c04d6e324 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -14,7 +14,6 @@
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
-#include <linux/mfd/ucb1x00.h>
 
 #include <asm/irq.h>
 #include <mach/hardware.h>
@@ -188,15 +187,10 @@ static struct resource simpad_flash_resources [] = {
 	}
 };
 
-static struct ucb1x00_plat_data simpad_ucb1x00_data = {
-	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
-};
-
 static struct mcp_plat_data simpad_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.codec		= "ucb1300",
-	.codec_pdata	= &simpad_ucb1x00_data,
+	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
 };
 
 
diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c
index 63be60bc3455..84815f9ef636 100644
--- a/drivers/mfd/mcp-core.c
+++ b/drivers/mfd/mcp-core.c
@@ -26,35 +26,9 @@
 #define to_mcp(d)		container_of(d, struct mcp, attached_device)
 #define to_mcp_driver(d)	container_of(d, struct mcp_driver, drv)
 
-static const struct mcp_device_id *mcp_match_id(const struct mcp_device_id *id,
-						const char *codec)
-{
-	while (id->name[0]) {
-		if (strcmp(codec, id->name) == 0)
-			return id;
-		id++;
-	}
-	return NULL;
-}
-
-const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp)
-{
-	const struct mcp_driver *driver =
-		to_mcp_driver(mcp->attached_device.driver);
-
-	return mcp_match_id(driver->id_table, mcp->codec);
-}
-EXPORT_SYMBOL(mcp_get_device_id);
-
 static int mcp_bus_match(struct device *dev, struct device_driver *drv)
 {
-	const struct mcp *mcp = to_mcp(dev);
-	const struct mcp_driver *driver = to_mcp_driver(drv);
-
-	if (driver->id_table)
-		return !!mcp_match_id(driver->id_table, mcp->codec);
-
-	return 0;
+	return 1;
 }
 
 static int mcp_bus_probe(struct device *dev)
@@ -100,18 +74,9 @@ static int mcp_bus_resume(struct device *dev)
 	return ret;
 }
 
-static int mcp_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	struct mcp *mcp = to_mcp(dev);
-
-	add_uevent_var(env, "MODALIAS=%s%s", MCP_MODULE_PREFIX, mcp->codec);
-	return 0;
-}
-
 static struct bus_type mcp_bus_type = {
 	.name		= "mcp",
 	.match		= mcp_bus_match,
-	.uevent		= mcp_bus_uevent,
 	.probe		= mcp_bus_probe,
 	.remove		= mcp_bus_remove,
 	.suspend	= mcp_bus_suspend,
@@ -247,14 +212,9 @@ struct mcp *mcp_host_alloc(struct device *parent, size_t size)
 }
 EXPORT_SYMBOL(mcp_host_alloc);
 
-int mcp_host_register(struct mcp *mcp, void *pdata)
+int mcp_host_register(struct mcp *mcp)
 {
-	if (!mcp->codec)
-		return -EINVAL;
-
-	mcp->attached_device.platform_data = pdata;
 	dev_set_name(&mcp->attached_device, "mcp0");
-	request_module("%s%s", MCP_MODULE_PREFIX, mcp->codec);
 	return device_register(&mcp->attached_device);
 }
 EXPORT_SYMBOL(mcp_host_register);
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index da4e077a1bee..02c53a0766c4 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -146,9 +146,6 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENODEV;
 
-	if (!data->codec)
-		return -ENODEV;
-
 	if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp"))
 		return -EBUSY;
 
@@ -165,7 +162,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->dma_audio_wr	= DMA_Ser4MCP0Wr;
 	mcp->dma_telco_rd	= DMA_Ser4MCP1Rd;
 	mcp->dma_telco_wr	= DMA_Ser4MCP1Wr;
-	mcp->codec		= data->codec;
+	mcp->gpio_base		= data->gpio_base;
 
 	platform_set_drvdata(pdev, mcp);
 
@@ -198,7 +195,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev)
 	mcp->rw_timeout = (64 * 3 * 1000000 + mcp->sclk_rate - 1) /
 			  mcp->sclk_rate;
 
-	ret = mcp_host_register(mcp, data->codec_pdata);
+	ret = mcp_host_register(mcp);
 	if (ret == 0)
 		goto out;
 
diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index 91c4f25e0e55..b281217334eb 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c
@@ -36,15 +36,6 @@ static DEFINE_MUTEX(ucb1x00_mutex);
 static LIST_HEAD(ucb1x00_drivers);
 static LIST_HEAD(ucb1x00_devices);
 
-static struct mcp_device_id ucb1x00_id[] = {
-	{ "ucb1x00", 0 },  /* auto-detection */
-	{ "ucb1200", UCB_ID_1200 },
-	{ "ucb1300", UCB_ID_1300 },
-	{ "tc35143", UCB_ID_TC35143 },
-	{ }
-};
-MODULE_DEVICE_TABLE(mcp, ucb1x00_id);
-
 /**
  *	ucb1x00_io_set_dir - set IO direction
  *	@ucb: UCB1x00 structure describing chip
@@ -536,33 +527,17 @@ static struct class ucb1x00_class = {
 
 static int ucb1x00_probe(struct mcp *mcp)
 {
-	const struct mcp_device_id *mid;
 	struct ucb1x00 *ucb;
 	struct ucb1x00_driver *drv;
-	struct ucb1x00_plat_data *pdata;
 	unsigned int id;
 	int ret = -ENODEV;
 	int temp;
 
 	mcp_enable(mcp);
 	id = mcp_reg_read(mcp, UCB_ID);
-	mid = mcp_get_device_id(mcp);
 
-	if (mid && mid->driver_data) {
-		if (id != mid->driver_data) {
-			printk(KERN_WARNING "%s wrong ID %04x found: %04x\n",
-				mid->name, (unsigned int) mid->driver_data, id);
-			goto err_disable;
-		}
-	} else {
-		mid = &ucb1x00_id[1];
-		while (mid->driver_data) {
-			if (id == mid->driver_data)
-				break;
-			mid++;
-		}
-		printk(KERN_WARNING "%s ID not found: %04x\n",
-			ucb1x00_id[0].name, id);
+	if (id != UCB_ID_1200 && id != UCB_ID_1300 && id != UCB_ID_TC35143) {
+		printk(KERN_WARNING "UCB1x00 ID not found: %04x\n", id);
 		goto err_disable;
 	}
 
@@ -571,28 +546,28 @@ static int ucb1x00_probe(struct mcp *mcp)
 	if (!ucb)
 		goto err_disable;
 
-	pdata = mcp->attached_device.platform_data;
+
 	ucb->dev.class = &ucb1x00_class;
 	ucb->dev.parent = &mcp->attached_device;
-	dev_set_name(&ucb->dev, mid->name);
+	dev_set_name(&ucb->dev, "ucb1x00");
 
 	spin_lock_init(&ucb->lock);
 	spin_lock_init(&ucb->io_lock);
 	sema_init(&ucb->adc_sem, 1);
 
-	ucb->id  = mid;
+	ucb->id  = id;
 	ucb->mcp = mcp;
 	ucb->irq = ucb1x00_detect_irq(ucb);
 	if (ucb->irq == NO_IRQ) {
-		printk(KERN_ERR "%s: IRQ probe failed\n", mid->name);
+		printk(KERN_ERR "UCB1x00: IRQ probe failed\n");
 		ret = -ENODEV;
 		goto err_free;
 	}
 
 	ucb->gpio.base = -1;
-	if (pdata && (pdata->gpio_base >= 0)) {
+	if (mcp->gpio_base != 0) {
 		ucb->gpio.label = dev_name(&ucb->dev);
-		ucb->gpio.base = pdata->gpio_base;
+		ucb->gpio.base = mcp->gpio_base;
 		ucb->gpio.ngpio = 10;
 		ucb->gpio.set = ucb1x00_gpio_set;
 		ucb->gpio.get = ucb1x00_gpio_get;
@@ -605,10 +580,10 @@ static int ucb1x00_probe(struct mcp *mcp)
 		dev_info(&ucb->dev, "gpio_base not set so no gpiolib support");
 
 	ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING,
-			  mid->name, ucb);
+			  "UCB1x00", ucb);
 	if (ret) {
-		printk(KERN_ERR "%s: unable to grab irq%d: %d\n",
-			mid->name, ucb->irq, ret);
+		printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
+			ucb->irq, ret);
 		goto err_gpio;
 	}
 
@@ -730,7 +705,6 @@ static struct mcp_driver ucb1x00_driver = {
 	.remove		= ucb1x00_remove,
 	.suspend	= ucb1x00_suspend,
 	.resume		= ucb1x00_resume,
-	.id_table	= ucb1x00_id,
 };
 
 static int __init ucb1x00_init(void)
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 40ec3c118868..38ffbd50a0d2 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -382,7 +382,7 @@ static int ucb1x00_ts_add(struct ucb1x00_dev *dev)
 	ts->adcsync = adcsync ? UCB_SYNC : UCB_NOSYNC;
 
 	idev->name       = "Touchscreen panel";
-	idev->id.product = ts->ucb->id->driver_data;
+	idev->id.product = ts->ucb->id;
 	idev->open       = ucb1x00_ts_open;
 	idev->close      = ucb1x00_ts_close;
 
diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index 1515e64e3663..ee496708e38b 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -10,7 +10,6 @@
 #ifndef MCP_H
 #define MCP_H
 
-#include <linux/mod_devicetable.h>
 #include <mach/dma.h>
 
 struct mcp_ops;
@@ -27,7 +26,7 @@ struct mcp {
 	dma_device_t	dma_telco_rd;
 	dma_device_t	dma_telco_wr;
 	struct device	attached_device;
-	const char	*codec;
+	int		gpio_base;
 };
 
 struct mcp_ops {
@@ -45,11 +44,10 @@ void mcp_reg_write(struct mcp *, unsigned int, unsigned int);
 unsigned int mcp_reg_read(struct mcp *, unsigned int);
 void mcp_enable(struct mcp *);
 void mcp_disable(struct mcp *);
-const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp);
 #define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
 
 struct mcp *mcp_host_alloc(struct device *, size_t);
-int mcp_host_register(struct mcp *, void *);
+int mcp_host_register(struct mcp *);
 void mcp_host_unregister(struct mcp *);
 
 struct mcp_driver {
@@ -58,7 +56,6 @@ struct mcp_driver {
 	void (*remove)(struct mcp *);
 	int (*suspend)(struct mcp *, pm_message_t);
 	int (*resume)(struct mcp *);
-	const struct mcp_device_id *id_table;
 };
 
 int mcp_driver_register(struct mcp_driver *);
diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index bc19e5fb7ea8..4321f044d1e4 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h
@@ -104,9 +104,6 @@
 #define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
 #define UCB_MODE_AUD_OFF_CAN	(1 << 13)
 
-struct ucb1x00_plat_data {
-	int		gpio_base;
-};
 
 struct ucb1x00_irq {
 	void *devid;
@@ -119,7 +116,7 @@ struct ucb1x00 {
 	unsigned int		irq;
 	struct semaphore	adc_sem;
 	spinlock_t		io_lock;
-	const struct mcp_device_id *id;
+	u16			id;
 	u16			io_dir;
 	u16			io_out;
 	u16			adc_cr;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b29e7f6f8fa5..83ac0713ed0a 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -436,17 +436,6 @@ struct spi_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
-/* mcp */
-
-#define MCP_NAME_SIZE	20
-#define MCP_MODULE_PREFIX "mcp:"
-
-struct mcp_device_id {
-	char name[MCP_NAME_SIZE];
-	kernel_ulong_t driver_data	/* Data private to the driver */
-			__attribute__((aligned(sizeof(kernel_ulong_t))));
-};
-
 /* dmi */
 enum dmi_field {
 	DMI_NONE,
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index c0e14b3f2306..e8c969577768 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -823,16 +823,6 @@ static int do_spi_entry(const char *filename, struct spi_device_id *id,
 }
 ADD_TO_DEVTABLE("spi", struct spi_device_id, do_spi_entry);
 
-/* Looks like: mcp:S */
-static int do_mcp_entry(const char *filename, struct mcp_device_id *id,
-			char *alias)
-{
-	sprintf(alias, MCP_MODULE_PREFIX "%s", id->name);
-
-	return 1;
-}
-ADD_TO_DEVTABLE("mcp", struct mcp_device_id, do_mcp_entry); 
-
 static const struct dmifield {
 	const char *prefix;
 	int field;
-- 
cgit v1.2.3


From 2a7f51a3e08cdaeea78d9e101a0079422a55bbc3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Sat, 21 Jan 2012 09:28:53 +0000
Subject: MFD: mcp-core: fix mcp_priv() to be more type safe

mcp_priv() does unexpected things when passed a void pointer.  Make it
a typed inline function, which ensures that it works correctly in
these cases.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/mfd/mcp.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index ee496708e38b..f88c1cc0cb0f 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h
@@ -64,6 +64,9 @@ void mcp_driver_unregister(struct mcp_driver *);
 #define mcp_get_drvdata(mcp)	dev_get_drvdata(&(mcp)->attached_device)
 #define mcp_set_drvdata(mcp,d)	dev_set_drvdata(&(mcp)->attached_device, d)
 
-#define mcp_priv(mcp)		((void *)((mcp)+1))
+static inline void *mcp_priv(struct mcp *mcp)
+{
+	return mcp + 1;
+}
 
 #endif
-- 
cgit v1.2.3


From 93ece0c1a7ace88f10411dbb5643d2aa2fe00ebf Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.co.il>
Date: Thu, 19 Jan 2012 09:45:05 +0000
Subject: mlx4_en: eth statistics modification

In native mode display all available staticstics.
In SRIOV mode on VF display only SW counters statistics,
in SRIOV mode on hypervisor display SW counters and errors (got from FW)
statistics.

Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Reviewed-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 66 +++++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c  |  2 +
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    |  1 +
 drivers/net/ethernet/mellanox/mlx4/port.c       | 21 ++++++++
 include/linux/mlx4/device.h                     |  1 +
 5 files changed, 75 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 53c66869aecd..70346fd7f9c4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -183,10 +183,11 @@ static int mlx4_en_set_wol(struct net_device *netdev,
 static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
+	int bit_count = hweight64(priv->stats_bitmap);
 
 	switch (sset) {
 	case ETH_SS_STATS:
-		return NUM_ALL_STATS +
+		return (priv->stats_bitmap ? bit_count : NUM_ALL_STATS) +
 			(priv->tx_ring_num + priv->rx_ring_num) * 2;
 	case ETH_SS_TEST:
 		return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
@@ -201,14 +202,34 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	int index = 0;
-	int i;
+	int i, j = 0;
 
 	spin_lock_bh(&priv->stats_lock);
 
-	for (i = 0; i < NUM_MAIN_STATS; i++)
-		data[index++] = ((unsigned long *) &priv->stats)[i];
-	for (i = 0; i < NUM_PORT_STATS; i++)
-		data[index++] = ((unsigned long *) &priv->port_stats)[i];
+	if (!(priv->stats_bitmap)) {
+		for (i = 0; i < NUM_MAIN_STATS; i++)
+			data[index++] =
+				((unsigned long *) &priv->stats)[i];
+		for (i = 0; i < NUM_PORT_STATS; i++)
+			data[index++] =
+				((unsigned long *) &priv->port_stats)[i];
+		for (i = 0; i < NUM_PKT_STATS; i++)
+			data[index++] =
+				((unsigned long *) &priv->pkstats)[i];
+	} else {
+		for (i = 0; i < NUM_MAIN_STATS; i++) {
+			if ((priv->stats_bitmap >> j) & 1)
+				data[index++] =
+				((unsigned long *) &priv->stats)[i];
+			j++;
+		}
+		for (i = 0; i < NUM_PORT_STATS; i++) {
+			if ((priv->stats_bitmap >> j) & 1)
+				data[index++] =
+				((unsigned long *) &priv->port_stats)[i];
+			j++;
+		}
+	}
 	for (i = 0; i < priv->tx_ring_num; i++) {
 		data[index++] = priv->tx_ring[i].packets;
 		data[index++] = priv->tx_ring[i].bytes;
@@ -217,8 +238,6 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 		data[index++] = priv->rx_ring[i].packets;
 		data[index++] = priv->rx_ring[i].bytes;
 	}
-	for (i = 0; i < NUM_PKT_STATS; i++)
-		data[index++] = ((unsigned long *) &priv->pkstats)[i];
 	spin_unlock_bh(&priv->stats_lock);
 
 }
@@ -247,11 +266,29 @@ static void mlx4_en_get_strings(struct net_device *dev,
 
 	case ETH_SS_STATS:
 		/* Add main counters */
-		for (i = 0; i < NUM_MAIN_STATS; i++)
-			strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]);
-		for (i = 0; i< NUM_PORT_STATS; i++)
-			strcpy(data + (index++) * ETH_GSTRING_LEN,
-			main_strings[i + NUM_MAIN_STATS]);
+		if (!priv->stats_bitmap) {
+			for (i = 0; i < NUM_MAIN_STATS; i++)
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+					main_strings[i]);
+			for (i = 0; i < NUM_PORT_STATS; i++)
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+					main_strings[i +
+					NUM_MAIN_STATS]);
+			for (i = 0; i < NUM_PKT_STATS; i++)
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+					main_strings[i +
+					NUM_MAIN_STATS +
+					NUM_PORT_STATS]);
+		} else
+			for (i = 0; i < NUM_MAIN_STATS + NUM_PORT_STATS; i++) {
+				if ((priv->stats_bitmap >> i) & 1) {
+					strcpy(data +
+					       (index++) * ETH_GSTRING_LEN,
+					       main_strings[i]);
+				}
+				if (!(priv->stats_bitmap >> i))
+					break;
+			}
 		for (i = 0; i < priv->tx_ring_num; i++) {
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"tx%d_packets", i);
@@ -264,9 +301,6 @@ static void mlx4_en_get_strings(struct net_device *dev,
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"rx%d_bytes", i);
 		}
-		for (i = 0; i< NUM_PKT_STATS; i++)
-			strcpy(data + (index++) * ETH_GSTRING_LEN,
-			main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]);
 		break;
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index be3f4156aaab..467ae5824875 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -702,6 +702,8 @@ int mlx4_en_start_port(struct net_device *dev)
 	/* Schedule multicast task to populate multicast list */
 	queue_work(mdev->workqueue, &priv->mcast_task);
 
+	mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap);
+
 	priv->port_up = true;
 	netif_tx_start_all_queues(dev);
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index f4d189a1290e..35f08840813c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -476,6 +476,7 @@ struct mlx4_en_priv {
 	struct mlx4_en_perf_stats pstats;
 	struct mlx4_en_pkt_stats pkstats;
 	struct mlx4_en_port_stats port_stats;
+	u64 stats_bitmap;
 	char *mc_addrs;
 	int mc_addrs_cnt;
 	struct mlx4_en_stat_out_mbox hw_stats;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 1a551d69ddcb..f44ae555bf43 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -44,6 +44,11 @@
 #define MLX4_VLAN_VALID		(1u << 31)
 #define MLX4_VLAN_MASK		0xfff
 
+#define MLX4_STATS_TRAFFIC_COUNTERS_MASK	0xfULL
+#define MLX4_STATS_TRAFFIC_DROPS_MASK		0xc0ULL
+#define MLX4_STATS_ERROR_COUNTERS_MASK		0x1ffc30ULL
+#define MLX4_STATS_PORT_COUNTERS_MASK		0x1fe00000ULL
+
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
 {
 	int i;
@@ -903,3 +908,19 @@ int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
 	return mlx4_common_dump_eth_stats(dev, slave,
 					  vhcr->in_modifier, outbox);
 }
+
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
+{
+	if (!mlx4_is_mfunc(dev)) {
+		*stats_bitmap = 0;
+		return;
+	}
+
+	*stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK |
+			 MLX4_STATS_TRAFFIC_DROPS_MASK |
+			 MLX4_STATS_PORT_COUNTERS_MASK);
+
+	if (mlx4_is_master(dev))
+		*stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
+}
+EXPORT_SYMBOL(mlx4_set_stats_bitmap);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5c4fe8e5bfe5..aea61905499b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -621,6 +621,7 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
 int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn);
 void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn);
+void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap);
 
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-- 
cgit v1.2.3


From 974c12360dfe6ab01201fe9e708e7755c413f8b6 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 19 Jan 2012 14:42:21 +0000
Subject: tcp: detect loss above high_seq in recovery

Correctly implement a loss detection heuristic: New sequences (above
high_seq) sent during the fast recovery are deemed lost when higher
sequences are SACKed.

Current code does not catch these losses, because tcp_mark_head_lost()
does not check packets beyond high_seq. The fix is straight-forward by
checking packets until the highest sacked packet. In addition, all the
FLAG_DATA_LOST logic are in-effective and redundant and can be removed.

Update the loss heuristic comments. The algorithm above is documented
as heuristic B, but it is redundant too because heuristic A already
covers B.

Note that this change only marks some forward-retransmitted packets LOST.
It does NOT forbid TCP performing further CWR on new losses. A potential
follow-up patch under preparation is to perform another CWR on "new"
losses such as
1) sequence above high_seq is lost (by resetting high_seq to snd_nxt)
2) retransmission is lost.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h |  1 -
 net/ipv4/proc.c      |  1 -
 net/ipv4/tcp_input.c | 41 +++++++++++++++--------------------------
 3 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index e16557a357e5..c1241c428179 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -192,7 +192,6 @@ enum
 	LINUX_MIB_TCPPARTIALUNDO,		/* TCPPartialUndo */
 	LINUX_MIB_TCPDSACKUNDO,			/* TCPDSACKUndo */
 	LINUX_MIB_TCPLOSSUNDO,			/* TCPLossUndo */
-	LINUX_MIB_TCPLOSS,			/* TCPLoss */
 	LINUX_MIB_TCPLOSTRETRANSMIT,		/* TCPLostRetransmit */
 	LINUX_MIB_TCPRENOFAILURES,		/* TCPRenoFailures */
 	LINUX_MIB_TCPSACKFAILURES,		/* TCPSackFailures */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3569d8ecaeac..6afc807ee2ad 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO),
 	SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO),
 	SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO),
-	SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS),
 	SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT),
 	SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES),
 	SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2877c3e09587..976034f82320 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_SYN_ACKED		0x10 /* This ACK acknowledged SYN.		*/
 #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
-#define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/
 #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
 #define FLAG_ONLY_ORIG_SACKED	0x200 /* SACKs only non-rexmit sent before RTO */
 #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
  * These 6 states form finite state machine, controlled by the following events:
  * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
  * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
- * 3. Loss detection event of one of three flavors:
+ * 3. Loss detection event of two flavors:
  *	A. Scoreboard estimator decided the packet is lost.
  *	   A'. Reno "three dupacks" marks head of queue lost.
- *	   A''. Its FACK modfication, head until snd.fack is lost.
- *	B. SACK arrives sacking data transmitted after never retransmitted
- *	   hole was sent out.
- *	C. SACK arrives sacking SND.NXT at the moment, when the
+ *	   A''. Its FACK modification, head until snd.fack is lost.
+ *	B. SACK arrives sacking SND.NXT at the moment, when the
  *	   segment was retransmitted.
  * 4. D-SACK added new rule: D-SACK changes any tag to S.
  *
@@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
 }
 
 /* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
- * Event "C". Later note: FACK people cheated me again 8), we have to account
+ * Event "B". Later note: FACK people cheated me again 8), we have to account
  * for reordering! Ugly, but should help.
  *
  * Search retransmitted skbs from write_queue that were sent when snd_nxt was
@@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 		if (found_dup_sack && ((i + 1) == first_sack_index))
 			next_dup = &sp[i + 1];
 
-		/* Event "B" in the comment above. */
-		if (after(end_seq, tp->high_seq))
-			state.flag |= FLAG_DATA_LOST;
-
 		/* Skip too early cached blocks */
 		while (tcp_sack_cache_ok(tp, cache) &&
 		       !before(start_seq, cache->end_seq))
@@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk)
 	tcp_verify_left_out(tp);
 }
 
-/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
- * is against sacked "cnt", otherwise it's against facked "cnt"
+/* Detect loss in event "A" above by marking head of queue up as lost.
+ * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
+ * are considered lost. For RFC3517 SACK, a segment is considered lost if it
+ * has at least tp->reordering SACKed seqments above it; "packets" refers to
+ * the maximum SACKed segments to pass before reaching this limit.
  */
 static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 {
@@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 	int cnt, oldcnt;
 	int err;
 	unsigned int mss;
+	/* Use SACK to deduce losses of new sequences sent during recovery */
+	const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
 
 	WARN_ON(packets > tp->packets_out);
 	if (tp->lost_skb_hint) {
@@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 		tp->lost_skb_hint = skb;
 		tp->lost_cnt_hint = cnt;
 
-		if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+		if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
 			break;
 
 		oldcnt = cnt;
@@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 	if (tcp_check_sack_reneging(sk, flag))
 		return;
 
-	/* C. Process data loss notification, provided it is valid. */
-	if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
-	    before(tp->snd_una, tp->high_seq) &&
-	    icsk->icsk_ca_state != TCP_CA_Open &&
-	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
-	}
-
-	/* D. Check consistency of the current state. */
+	/* C. Check consistency of the current state. */
 	tcp_verify_left_out(tp);
 
-	/* E. Check state exit conditions. State can be terminated
+	/* D. Check state exit conditions. State can be terminated
 	 *    when high_seq is ACKed. */
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
 		WARN_ON(tp->retrans_out != 0);
@@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 		}
 	}
 
-	/* F. Process state. */
+	/* E. Process state. */
 	switch (icsk->icsk_ca_state) {
 	case TCP_CA_Recovery:
 		if (!(flag & FLAG_SND_UNA_ADVANCED)) {
-- 
cgit v1.2.3


From 8cfd14ad1eb52e44cb1fe7b47a68126e45e04026 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Fri, 20 Jan 2012 04:57:15 +0000
Subject: cgroup: make sure memcg margin is 0 when over limit

For the memcg sock code, we'll need to register allocations
that are temporarily over limit. Let's make sure that margin
is 0 in this case.

I am keeping this as a separate patch, so that if any weirdness
interaction appears in the future, we can now exactly what caused
it.

Suggested by Johannes Weiner

Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: Johannes Weiner <hannes@cmpxchg.org>
CC: Michal Hocko <mhocko@suse.cz>
CC: Tejun Heo <tj@kernel.org>
CC: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/res_counter.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index c9d625ca659e..d06d014afda6 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -142,7 +142,10 @@ static inline unsigned long long res_counter_margin(struct res_counter *cnt)
 	unsigned long flags;
 
 	spin_lock_irqsave(&cnt->lock, flags);
-	margin = cnt->limit - cnt->usage;
+	if (cnt->limit > cnt->usage)
+		margin = cnt->limit - cnt->usage;
+	else
+		margin = 0;
 	spin_unlock_irqrestore(&cnt->lock, flags);
 	return margin;
 }
-- 
cgit v1.2.3


From 0e90b31f4ba77027a7c21cbfc66404df0851ca21 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Fri, 20 Jan 2012 04:57:16 +0000
Subject: net: introduce res_counter_charge_nofail() for socket allocations

There is a case in __sk_mem_schedule(), where an allocation
is beyond the maximum, but yet we are allowed to proceed.
It happens under the following condition:

	sk->sk_wmem_queued + size >= sk->sk_sndbuf

The network code won't revert the allocation in this case,
meaning that at some point later it'll try to do it. Since
this is never communicated to the underlying res_counter
code, there is an inbalance in res_counter uncharge operation.

I see two ways of fixing this:

1) storing the information about those allocations somewhere
   in memcg, and then deducting from that first, before
   we start draining the res_counter,
2) providing a slightly different allocation function for
   the res_counter, that matches the original behavior of
   the network code more closely.

I decided to go for #2 here, believing it to be more elegant,
since #1 would require us to do basically that, but in a more
obscure way.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
CC: Tejun Heo <tj@kernel.org>
CC: Li Zefan <lizf@cn.fujitsu.com>
CC: Laurent Chavey <chavey@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/res_counter.h |  6 ++++++
 include/net/sock.h          | 10 ++++------
 kernel/res_counter.c        | 25 +++++++++++++++++++++++++
 net/core/sock.c             |  4 ++--
 4 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index d06d014afda6..da81af086eaf 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -109,12 +109,18 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
  *
  * returns 0 on success and <0 if the counter->usage will exceed the
  * counter->limit _locked call expects the counter->lock to be taken
+ *
+ * charge_nofail works the same, except that it charges the resource
+ * counter unconditionally, and returns < 0 if the after the current
+ * charge we are over limit.
  */
 
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
 		unsigned long val, struct res_counter **limit_fail_at);
+int __must_check res_counter_charge_nofail(struct res_counter *counter,
+		unsigned long val, struct res_counter **limit_fail_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
diff --git a/include/net/sock.h b/include/net/sock.h
index 0e7a9b05f92b..4c69ac165e6b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1008,9 +1008,8 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
 	struct res_counter *fail;
 	int ret;
 
-	ret = res_counter_charge(prot->memory_allocated,
-				 amt << PAGE_SHIFT, &fail);
-
+	ret = res_counter_charge_nofail(prot->memory_allocated,
+					amt << PAGE_SHIFT, &fail);
 	if (ret < 0)
 		*parent_status = OVER_LIMIT;
 }
@@ -1054,12 +1053,11 @@ sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
 }
 
 static inline void
-sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status)
+sk_memory_allocated_sub(struct sock *sk, int amt)
 {
 	struct proto *prot = sk->sk_prot;
 
-	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
-	    parent_status != OVER_LIMIT) /* Otherwise was uncharged already */
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
 		memcg_memory_allocated_sub(sk->sk_cgrp, amt);
 
 	atomic_long_sub(amt, prot->memory_allocated);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 6d269cce7aa1..d508363858b3 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -66,6 +66,31 @@ done:
 	return ret;
 }
 
+int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
+			      struct res_counter **limit_fail_at)
+{
+	int ret, r;
+	unsigned long flags;
+	struct res_counter *c;
+
+	r = ret = 0;
+	*limit_fail_at = NULL;
+	local_irq_save(flags);
+	for (c = counter; c != NULL; c = c->parent) {
+		spin_lock(&c->lock);
+		r = res_counter_charge_locked(c, val);
+		if (r)
+			c->usage += val;
+		spin_unlock(&c->lock);
+		if (r < 0 && ret == 0) {
+			*limit_fail_at = c;
+			ret = r;
+		}
+	}
+	local_irq_restore(flags);
+
+	return ret;
+}
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 {
 	if (WARN_ON(counter->usage < val))
diff --git a/net/core/sock.c b/net/core/sock.c
index 5c5af9988f94..3e81fd2e3c75 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1827,7 +1827,7 @@ suppress_allocation:
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
 
-	sk_memory_allocated_sub(sk, amt, parent_status);
+	sk_memory_allocated_sub(sk, amt);
 
 	return 0;
 }
@@ -1840,7 +1840,7 @@ EXPORT_SYMBOL(__sk_mem_schedule);
 void __sk_mem_reclaim(struct sock *sk)
 {
 	sk_memory_allocated_sub(sk,
-				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0);
+				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
 	if (sk_under_memory_pressure(sk) &&
-- 
cgit v1.2.3


From e9c688a3272fd4b659228f3880de8109a94540e2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Sun, 22 Jan 2012 14:31:15 -0700
Subject: driver core: remove drivers/base/sys.c and include/linux/sysdev.h

Now that all users of 'struct sysdev' are removed from the kernel, we
can safely remove the .h and .c files for this code, to ensure that no
one accidentally starts to use it again.

Many thanks for Kay who did all the hard work here on making this
happen.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/Makefile  |   2 +-
 drivers/base/sys.c     | 383 -------------------------------------------------
 include/linux/sysdev.h | 164 ---------------------
 3 files changed, 1 insertion(+), 548 deletions(-)
 delete mode 100644 drivers/base/sys.c
 delete mode 100644 include/linux/sysdev.h

(limited to 'include/linux')

diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 2c8272dd93c4..610f9997a403 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -1,6 +1,6 @@
 # Makefile for the Linux device tree
 
-obj-y			:= core.o sys.o bus.o dd.o syscore.o \
+obj-y			:= core.o bus.o dd.o syscore.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o devres.o \
 			   attribute_container.o transport_class.o \
diff --git a/drivers/base/sys.c b/drivers/base/sys.c
deleted file mode 100644
index 409f5ce78829..000000000000
--- a/drivers/base/sys.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * sys.c - pseudo-bus for system 'devices' (cpus, PICs, timers, etc)
- *
- * Copyright (c) 2002-3 Patrick Mochel
- *               2002-3 Open Source Development Lab
- *
- * This file is released under the GPLv2
- *
- * This exports a 'system' bus type.
- * By default, a 'sys' bus gets added to the root of the system. There will
- * always be core system devices. Devices can use sysdev_register() to
- * add themselves as children of the system bus.
- */
-
-#include <linux/sysdev.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/pm.h>
-#include <linux/device.h>
-#include <linux/mutex.h>
-#include <linux/interrupt.h>
-
-#include "base.h"
-
-#define to_sysdev(k) container_of(k, struct sys_device, kobj)
-#define to_sysdev_attr(a) container_of(a, struct sysdev_attribute, attr)
-
-
-static ssize_t
-sysdev_show(struct kobject *kobj, struct attribute *attr, char *buffer)
-{
-	struct sys_device *sysdev = to_sysdev(kobj);
-	struct sysdev_attribute *sysdev_attr = to_sysdev_attr(attr);
-
-	if (sysdev_attr->show)
-		return sysdev_attr->show(sysdev, sysdev_attr, buffer);
-	return -EIO;
-}
-
-
-static ssize_t
-sysdev_store(struct kobject *kobj, struct attribute *attr,
-	     const char *buffer, size_t count)
-{
-	struct sys_device *sysdev = to_sysdev(kobj);
-	struct sysdev_attribute *sysdev_attr = to_sysdev_attr(attr);
-
-	if (sysdev_attr->store)
-		return sysdev_attr->store(sysdev, sysdev_attr, buffer, count);
-	return -EIO;
-}
-
-static const struct sysfs_ops sysfs_ops = {
-	.show	= sysdev_show,
-	.store	= sysdev_store,
-};
-
-static struct kobj_type ktype_sysdev = {
-	.sysfs_ops	= &sysfs_ops,
-};
-
-
-int sysdev_create_file(struct sys_device *s, struct sysdev_attribute *a)
-{
-	return sysfs_create_file(&s->kobj, &a->attr);
-}
-
-
-void sysdev_remove_file(struct sys_device *s, struct sysdev_attribute *a)
-{
-	sysfs_remove_file(&s->kobj, &a->attr);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_create_file);
-EXPORT_SYMBOL_GPL(sysdev_remove_file);
-
-#define to_sysdev_class(k) container_of(k, struct sysdev_class, kset.kobj)
-#define to_sysdev_class_attr(a) container_of(a, \
-	struct sysdev_class_attribute, attr)
-
-static ssize_t sysdev_class_show(struct kobject *kobj, struct attribute *attr,
-				 char *buffer)
-{
-	struct sysdev_class *class = to_sysdev_class(kobj);
-	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
-
-	if (class_attr->show)
-		return class_attr->show(class, class_attr, buffer);
-	return -EIO;
-}
-
-static ssize_t sysdev_class_store(struct kobject *kobj, struct attribute *attr,
-				  const char *buffer, size_t count)
-{
-	struct sysdev_class *class = to_sysdev_class(kobj);
-	struct sysdev_class_attribute *class_attr = to_sysdev_class_attr(attr);
-
-	if (class_attr->store)
-		return class_attr->store(class, class_attr, buffer, count);
-	return -EIO;
-}
-
-static const struct sysfs_ops sysfs_class_ops = {
-	.show	= sysdev_class_show,
-	.store	= sysdev_class_store,
-};
-
-static struct kobj_type ktype_sysdev_class = {
-	.sysfs_ops	= &sysfs_class_ops,
-};
-
-int sysdev_class_create_file(struct sysdev_class *c,
-			     struct sysdev_class_attribute *a)
-{
-	return sysfs_create_file(&c->kset.kobj, &a->attr);
-}
-EXPORT_SYMBOL_GPL(sysdev_class_create_file);
-
-void sysdev_class_remove_file(struct sysdev_class *c,
-			      struct sysdev_class_attribute *a)
-{
-	sysfs_remove_file(&c->kset.kobj, &a->attr);
-}
-EXPORT_SYMBOL_GPL(sysdev_class_remove_file);
-
-extern struct kset *system_kset;
-
-int sysdev_class_register(struct sysdev_class *cls)
-{
-	int retval;
-
-	pr_debug("Registering sysdev class '%s'\n", cls->name);
-
-	INIT_LIST_HEAD(&cls->drivers);
-	memset(&cls->kset.kobj, 0x00, sizeof(struct kobject));
-	cls->kset.kobj.parent = &system_kset->kobj;
-	cls->kset.kobj.ktype = &ktype_sysdev_class;
-	cls->kset.kobj.kset = system_kset;
-
-	retval = kobject_set_name(&cls->kset.kobj, "%s", cls->name);
-	if (retval)
-		return retval;
-
-	retval = kset_register(&cls->kset);
-	if (!retval && cls->attrs)
-		retval = sysfs_create_files(&cls->kset.kobj,
-					    (const struct attribute **)cls->attrs);
-	return retval;
-}
-
-void sysdev_class_unregister(struct sysdev_class *cls)
-{
-	pr_debug("Unregistering sysdev class '%s'\n",
-		 kobject_name(&cls->kset.kobj));
-	if (cls->attrs)
-		sysfs_remove_files(&cls->kset.kobj,
-				   (const struct attribute **)cls->attrs);
-	kset_unregister(&cls->kset);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_class_register);
-EXPORT_SYMBOL_GPL(sysdev_class_unregister);
-
-static DEFINE_MUTEX(sysdev_drivers_lock);
-
-/*
- * @dev != NULL means that we're unwinding because some drv->add()
- * failed for some reason. You need to grab sysdev_drivers_lock before
- * calling this.
- */
-static void __sysdev_driver_remove(struct sysdev_class *cls,
-				   struct sysdev_driver *drv,
-				   struct sys_device *from_dev)
-{
-	struct sys_device *dev = from_dev;
-
-	list_del_init(&drv->entry);
-	if (!cls)
-		return;
-
-	if (!drv->remove)
-		goto kset_put;
-
-	if (dev)
-		list_for_each_entry_continue_reverse(dev, &cls->kset.list,
-						     kobj.entry)
-			drv->remove(dev);
-	else
-		list_for_each_entry(dev, &cls->kset.list, kobj.entry)
-			drv->remove(dev);
-
-kset_put:
-	kset_put(&cls->kset);
-}
-
-/**
- *	sysdev_driver_register - Register auxiliary driver
- *	@cls:	Device class driver belongs to.
- *	@drv:	Driver.
- *
- *	@drv is inserted into @cls->drivers to be
- *	called on each operation on devices of that class. The refcount
- *	of @cls is incremented.
- */
-int sysdev_driver_register(struct sysdev_class *cls, struct sysdev_driver *drv)
-{
-	struct sys_device *dev = NULL;
-	int err = 0;
-
-	if (!cls) {
-		WARN(1, KERN_WARNING "sysdev: invalid class passed to %s!\n",
-			__func__);
-		return -EINVAL;
-	}
-
-	/* Check whether this driver has already been added to a class. */
-	if (drv->entry.next && !list_empty(&drv->entry))
-		WARN(1, KERN_WARNING "sysdev: class %s: driver (%p) has already"
-			" been registered to a class, something is wrong, but "
-			"will forge on!\n", cls->name, drv);
-
-	mutex_lock(&sysdev_drivers_lock);
-	if (cls && kset_get(&cls->kset)) {
-		list_add_tail(&drv->entry, &cls->drivers);
-
-		/* If devices of this class already exist, tell the driver */
-		if (drv->add) {
-			list_for_each_entry(dev, &cls->kset.list, kobj.entry) {
-				err = drv->add(dev);
-				if (err)
-					goto unwind;
-			}
-		}
-	} else {
-		err = -EINVAL;
-		WARN(1, KERN_ERR "%s: invalid device class\n", __func__);
-	}
-
-	goto unlock;
-
-unwind:
-	__sysdev_driver_remove(cls, drv, dev);
-
-unlock:
-	mutex_unlock(&sysdev_drivers_lock);
-	return err;
-}
-
-/**
- *	sysdev_driver_unregister - Remove an auxiliary driver.
- *	@cls:	Class driver belongs to.
- *	@drv:	Driver.
- */
-void sysdev_driver_unregister(struct sysdev_class *cls,
-			      struct sysdev_driver *drv)
-{
-	mutex_lock(&sysdev_drivers_lock);
-	__sysdev_driver_remove(cls, drv, NULL);
-	mutex_unlock(&sysdev_drivers_lock);
-}
-EXPORT_SYMBOL_GPL(sysdev_driver_register);
-EXPORT_SYMBOL_GPL(sysdev_driver_unregister);
-
-/**
- *	sysdev_register - add a system device to the tree
- *	@sysdev:	device in question
- *
- */
-int sysdev_register(struct sys_device *sysdev)
-{
-	int error;
-	struct sysdev_class *cls = sysdev->cls;
-
-	if (!cls)
-		return -EINVAL;
-
-	pr_debug("Registering sys device of class '%s'\n",
-		 kobject_name(&cls->kset.kobj));
-
-	/* initialize the kobject to 0, in case it had previously been used */
-	memset(&sysdev->kobj, 0x00, sizeof(struct kobject));
-
-	/* Make sure the kset is set */
-	sysdev->kobj.kset = &cls->kset;
-
-	/* Register the object */
-	error = kobject_init_and_add(&sysdev->kobj, &ktype_sysdev, NULL,
-				     "%s%d", kobject_name(&cls->kset.kobj),
-				     sysdev->id);
-
-	if (!error) {
-		struct sysdev_driver *drv;
-
-		pr_debug("Registering sys device '%s'\n",
-			 kobject_name(&sysdev->kobj));
-
-		mutex_lock(&sysdev_drivers_lock);
-		/* Generic notification is implicit, because it's that
-		 * code that should have called us.
-		 */
-
-		/* Notify class auxiliary drivers */
-		list_for_each_entry(drv, &cls->drivers, entry) {
-			if (drv->add)
-				drv->add(sysdev);
-		}
-		mutex_unlock(&sysdev_drivers_lock);
-		kobject_uevent(&sysdev->kobj, KOBJ_ADD);
-	}
-
-	return error;
-}
-
-void sysdev_unregister(struct sys_device *sysdev)
-{
-	struct sysdev_driver *drv;
-
-	mutex_lock(&sysdev_drivers_lock);
-	list_for_each_entry(drv, &sysdev->cls->drivers, entry) {
-		if (drv->remove)
-			drv->remove(sysdev);
-	}
-	mutex_unlock(&sysdev_drivers_lock);
-
-	kobject_put(&sysdev->kobj);
-}
-
-EXPORT_SYMBOL_GPL(sysdev_register);
-EXPORT_SYMBOL_GPL(sysdev_unregister);
-
-#define to_ext_attr(x) container_of(x, struct sysdev_ext_attribute, attr)
-
-ssize_t sysdev_store_ulong(struct sys_device *sysdev,
-			   struct sysdev_attribute *attr,
-			   const char *buf, size_t size)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	char *end;
-	unsigned long new = simple_strtoul(buf, &end, 0);
-	if (end == buf)
-		return -EINVAL;
-	*(unsigned long *)(ea->var) = new;
-	/* Always return full write size even if we didn't consume all */
-	return size;
-}
-EXPORT_SYMBOL_GPL(sysdev_store_ulong);
-
-ssize_t sysdev_show_ulong(struct sys_device *sysdev,
-			  struct sysdev_attribute *attr,
-			  char *buf)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var));
-}
-EXPORT_SYMBOL_GPL(sysdev_show_ulong);
-
-ssize_t sysdev_store_int(struct sys_device *sysdev,
-			   struct sysdev_attribute *attr,
-			   const char *buf, size_t size)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	char *end;
-	long new = simple_strtol(buf, &end, 0);
-	if (end == buf || new > INT_MAX || new < INT_MIN)
-		return -EINVAL;
-	*(int *)(ea->var) = new;
-	/* Always return full write size even if we didn't consume all */
-	return size;
-}
-EXPORT_SYMBOL_GPL(sysdev_store_int);
-
-ssize_t sysdev_show_int(struct sys_device *sysdev,
-			  struct sysdev_attribute *attr,
-			  char *buf)
-{
-	struct sysdev_ext_attribute *ea = to_ext_attr(attr);
-	return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var));
-}
-EXPORT_SYMBOL_GPL(sysdev_show_int);
-
diff --git a/include/linux/sysdev.h b/include/linux/sysdev.h
deleted file mode 100644
index 20f63d3e6144..000000000000
--- a/include/linux/sysdev.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/**
- * System devices follow a slightly different driver model. 
- * They don't need to do dynammic driver binding, can't be probed, 
- * and don't reside on any type of peripheral bus. 
- * So, we represent and treat them a little differently.
- * 
- * We still have a notion of a driver for a system device, because we still
- * want to perform basic operations on these devices. 
- *
- * We also support auxiliary drivers binding to devices of a certain class.
- * 
- * This allows configurable drivers to register themselves for devices of
- * a certain type. And, it allows class definitions to reside in generic
- * code while arch-specific code can register specific drivers.
- *
- * Auxiliary drivers registered with a NULL cls are registered as drivers
- * for all system devices, and get notification calls for each device. 
- */
-
-
-#ifndef _SYSDEV_H_
-#define _SYSDEV_H_
-
-#include <linux/kobject.h>
-#include <linux/pm.h>
-
-
-struct sys_device;
-struct sysdev_class_attribute;
-
-struct sysdev_class {
-	const char *name;
-	struct list_head	drivers;
-	struct sysdev_class_attribute **attrs;
-	struct kset		kset;
-};
-
-struct sysdev_class_attribute {
-	struct attribute attr;
-	ssize_t (*show)(struct sysdev_class *, struct sysdev_class_attribute *,
-			char *);
-	ssize_t (*store)(struct sysdev_class *, struct sysdev_class_attribute *,
-			 const char *, size_t);
-};
-
-#define _SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) 		\
-{					 			\
-	.attr = {.name = __stringify(_name), .mode = _mode },	\
-	.show	= _show,					\
-	.store	= _store,					\
-}
-
-#define SYSDEV_CLASS_ATTR(_name,_mode,_show,_store) 		\
-	struct sysdev_class_attribute attr_##_name = 		\
-		_SYSDEV_CLASS_ATTR(_name,_mode,_show,_store)
-
-
-extern int sysdev_class_register(struct sysdev_class *);
-extern void sysdev_class_unregister(struct sysdev_class *);
-
-extern int sysdev_class_create_file(struct sysdev_class *,
-	struct sysdev_class_attribute *);
-extern void sysdev_class_remove_file(struct sysdev_class *,
-	struct sysdev_class_attribute *);
-/**
- * Auxiliary system device drivers.
- */
-
-struct sysdev_driver {
-	struct list_head	entry;
-	int	(*add)(struct sys_device *);
-	int	(*remove)(struct sys_device *);
-};
-
-
-extern int sysdev_driver_register(struct sysdev_class *, struct sysdev_driver *);
-extern void sysdev_driver_unregister(struct sysdev_class *, struct sysdev_driver *);
-
-
-/**
- * sys_devices can be simplified a lot from regular devices, because they're
- * simply not as versatile. 
- */
-
-struct sys_device {
-	u32		id;
-	struct sysdev_class	* cls;
-	struct kobject		kobj;
-};
-
-extern int sysdev_register(struct sys_device *);
-extern void sysdev_unregister(struct sys_device *);
-
-
-struct sysdev_attribute { 
-	struct attribute	attr;
-	ssize_t (*show)(struct sys_device *, struct sysdev_attribute *, char *);
-	ssize_t (*store)(struct sys_device *, struct sysdev_attribute *,
-			 const char *, size_t);
-};
-
-
-#define _SYSDEV_ATTR(_name, _mode, _show, _store)		\
-{								\
-	.attr = { .name = __stringify(_name), .mode = _mode },	\
-	.show	= _show,					\
-	.store	= _store,					\
-}
-
-#define SYSDEV_ATTR(_name, _mode, _show, _store)		\
-	struct sysdev_attribute attr_##_name =			\
-		_SYSDEV_ATTR(_name, _mode, _show, _store);
-
-extern int sysdev_create_file(struct sys_device *, struct sysdev_attribute *);
-extern void sysdev_remove_file(struct sys_device *, struct sysdev_attribute *);
-
-/* Create/remove NULL terminated attribute list */
-static inline int
-sysdev_create_files(struct sys_device *d, struct sysdev_attribute **a)
-{
-	return sysfs_create_files(&d->kobj, (const struct attribute **)a);
-}
-
-static inline void
-sysdev_remove_files(struct sys_device *d, struct sysdev_attribute **a)
-{
-	return sysfs_remove_files(&d->kobj, (const struct attribute **)a);
-}
-
-struct sysdev_ext_attribute {
-	struct sysdev_attribute attr;
-	void *var;
-};
-
-/*
- * Support for simple variable sysdev attributes.
- * The pointer to the variable is stored in a sysdev_ext_attribute
- */
-
-/* Add more types as needed */
-
-extern ssize_t sysdev_show_ulong(struct sys_device *, struct sysdev_attribute *,
-				char *);
-extern ssize_t sysdev_store_ulong(struct sys_device *,
-			struct sysdev_attribute *, const char *, size_t);
-extern ssize_t sysdev_show_int(struct sys_device *, struct sysdev_attribute *,
-				char *);
-extern ssize_t sysdev_store_int(struct sys_device *,
-			struct sysdev_attribute *, const char *, size_t);
-
-#define _SYSDEV_ULONG_ATTR(_name, _mode, _var)				\
-	{ _SYSDEV_ATTR(_name, _mode, sysdev_show_ulong, sysdev_store_ulong), \
-	  &(_var) }
-#define SYSDEV_ULONG_ATTR(_name, _mode, _var)			\
-	struct sysdev_ext_attribute attr_##_name = 		\
-		_SYSDEV_ULONG_ATTR(_name, _mode, _var);
-#define _SYSDEV_INT_ATTR(_name, _mode, _var)				\
-	{ _SYSDEV_ATTR(_name, _mode, sysdev_show_int, sysdev_store_int), \
-	  &(_var) }
-#define SYSDEV_INT_ATTR(_name, _mode, _var)			\
-	struct sysdev_ext_attribute attr_##_name = 		\
-		_SYSDEV_INT_ATTR(_name, _mode, _var);
-
-#endif /* _SYSDEV_H_ */
-- 
cgit v1.2.3


From 2d58d7ea9164da59d0ea82fdf80e3ababe52d58c Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 4 Nov 2011 10:31:04 +0100
Subject: thermal: Rename generate_netlink_event

It doesn't seem right for the thermal subsystem to export a symbol
named generate_netlink_event. This function is thermal-specific and
its name should reflect that fact. Rename it to
thermal_generate_netlink_event.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: R.Durgadoss <durgadoss.r@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/thermal/sysfs-api.txt | 2 +-
 drivers/thermal/thermal_sys.c       | 4 ++--
 include/linux/thermal.h             | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index b61e46f449aa..1733ab947a95 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -284,7 +284,7 @@ method, the sys I/F structure will be built like this:
 The framework includes a simple notification mechanism, in the form of a
 netlink event. Netlink socket initialization is done during the _init_
 of the framework. Drivers which intend to use the notification mechanism
-just need to call generate_netlink_event() with two arguments viz
+just need to call thermal_generate_netlink_event() with two arguments viz
 (originator, event). Typically the originator will be an integer assigned
 to a thermal_zone_device when it registers itself with the framework. The
 event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index dd9a5743fa99..220ce7e31cf5 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -1304,7 +1304,7 @@ static struct genl_multicast_group thermal_event_mcgrp = {
 	.name = THERMAL_GENL_MCAST_GROUP_NAME,
 };
 
-int generate_netlink_event(u32 orig, enum events event)
+int thermal_generate_netlink_event(u32 orig, enum events event)
 {
 	struct sk_buff *skb;
 	struct nlattr *attr;
@@ -1363,7 +1363,7 @@ int generate_netlink_event(u32 orig, enum events event)
 
 	return result;
 }
-EXPORT_SYMBOL(generate_netlink_event);
+EXPORT_SYMBOL(thermal_generate_netlink_event);
 
 static int genetlink_init(void)
 {
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 47b4a27e6e97..796f1ff0388c 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -152,9 +152,9 @@ struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
 void thermal_cooling_device_unregister(struct thermal_cooling_device *);
 
 #ifdef CONFIG_NET
-extern int generate_netlink_event(u32 orig, enum events event);
+extern int thermal_generate_netlink_event(u32 orig, enum events event);
 #else
-static inline int generate_netlink_event(u32 orig, enum events event)
+static inline int thermal_generate_netlink_event(u32 orig, enum events event)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 6536e3123e5d3371a6f52e32a3d0694bcc987702 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 20 Jan 2012 14:33:53 -0800
Subject: mm: fix warnings regarding enum migrate_mode

sparc64 allmodconfig:

In file included from include/linux/compat.h:15,
                 from /usr/src/25/arch/sparc/include/asm/siginfo.h:19,
                 from include/linux/signal.h:5,
                 from include/linux/sched.h:73,
                 from arch/sparc/kernel/asm-offsets.c:13:
include/linux/fs.h:618: warning: parameter has incomplete type

It seems that my sparc64 compiler (gcc-3.4.5) doesn't like the forward
declaration of enums.

Fix this by moving the "enum migrate_mode" definition into its own header
file.

Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andy Isaacson <adi@hexapodia.org>
Cc: Nai Xia <nai.xia@gmail.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h           |  2 +-
 include/linux/migrate.h      | 14 +-------------
 include/linux/migrate_mode.h | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/migrate_mode.h

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0244082d42c5..4b3a41fe22bf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,6 +10,7 @@
 #include <linux/ioctl.h>
 #include <linux/blk_types.h>
 #include <linux/types.h>
+#include <linux/migrate_mode.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -526,7 +527,6 @@ enum positive_aop_returns {
 struct page;
 struct address_space;
 struct writeback_control;
-enum migrate_mode;
 
 struct iov_iter {
 	const struct iovec *iov;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index eaf867412f7a..05ed2828a553 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -3,22 +3,10 @@
 
 #include <linux/mm.h>
 #include <linux/mempolicy.h>
+#include <linux/migrate_mode.h>
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
-/*
- * MIGRATE_ASYNC means never block
- * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
- *	on most operations but not ->writepage as the potential stall time
- *	is too significant
- * MIGRATE_SYNC will block when migrating pages
- */
-enum migrate_mode {
-	MIGRATE_ASYNC,
-	MIGRATE_SYNC_LIGHT,
-	MIGRATE_SYNC,
-};
-
 #ifdef CONFIG_MIGRATION
 #define PAGE_MIGRATION 1
 
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
new file mode 100644
index 000000000000..ebf3d89a3919
--- /dev/null
+++ b/include/linux/migrate_mode.h
@@ -0,0 +1,16 @@
+#ifndef MIGRATE_MODE_H_INCLUDED
+#define MIGRATE_MODE_H_INCLUDED
+/*
+ * MIGRATE_ASYNC means never block
+ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
+ *	on most operations but not ->writepage as the potential stall time
+ *	is too significant
+ * MIGRATE_SYNC will block when migrating pages
+ */
+enum migrate_mode {
+	MIGRATE_ASYNC,
+	MIGRATE_SYNC_LIGHT,
+	MIGRATE_SYNC,
+};
+
+#endif		/* MIGRATE_MODE_H_INCLUDED */
-- 
cgit v1.2.3


From cb78edfdcef5259ac9e9088bd63810d21299928d Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Fri, 20 Jan 2012 14:34:16 -0800
Subject: kdump: define KEXEC_NOTE_BYTES arch specific for s390x

kdump only allocates memory for the prstatus ELF note.  For s390x,
besides of prstatus multiple ELF notes for various different register
types are stored.  Therefore the currently allocated memory is not
sufficient.  With this patch the KEXEC_NOTE_BYTES macro can be defined
by architecture code and for s390x it is set to the correct size now.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/kexec.h | 18 ++++++++++++++++++
 include/linux/kexec.h         |  2 ++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index cf4e47b0948c..3f30dac804ea 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -42,6 +42,24 @@
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_S390
 
+/*
+ * Size for s390x ELF notes per CPU
+ *
+ * Seven notes plus zero note at the end: prstatus, fpregset, timer,
+ * tod_cmp, tod_reg, control regs, and prefix
+ */
+#define KEXEC_NOTE_BYTES \
+	(ALIGN(sizeof(struct elf_note), 4) * 8 + \
+	 ALIGN(sizeof("CORE"), 4) * 7 + \
+	 ALIGN(sizeof(struct elf_prstatus), 4) + \
+	 ALIGN(sizeof(elf_fpregset_t), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u32), 4) + \
+	 ALIGN(sizeof(u64) * 16, 4) + \
+	 ALIGN(sizeof(u32), 4) \
+	)
+
 /* Provide a dummy definition to avoid build failures. */
 static inline void crash_setup_regs(struct pt_regs *newregs,
 					struct pt_regs *oldregs) { }
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 2fa0901219d4..0d7d6a1b172f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -50,9 +50,11 @@
  * note header.  For kdump, the code in vmcore.c runs in the context
  * of the second kernel to combine them into one note.
  */
+#ifndef KEXEC_NOTE_BYTES
 #define KEXEC_NOTE_BYTES ( (KEXEC_NOTE_HEAD_BYTES * 2) +		\
 			    KEXEC_CORE_NOTE_NAME_BYTES +		\
 			    KEXEC_CORE_NOTE_DESC_BYTES )
+#endif
 
 /*
  * This structure is used to hold the arguments that are used when loading
-- 
cgit v1.2.3


From 245132643e1cfcd145bbc86a716c1818371fcb93 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 20 Jan 2012 14:34:21 -0800
Subject: SHM_UNLOCK: fix Unevictable pages stranded after swap

Commit cc39c6a9bbde ("mm: account skipped entries to avoid looping in
find_get_pages") correctly fixed an infinite loop; but left a problem
that find_get_pages() on shmem would return 0 (appearing to callers to
mean end of tree) when it meets a run of nr_pages swap entries.

The only uses of find_get_pages() on shmem are via pagevec_lookup(),
called from invalidate_mapping_pages(), and from shmctl SHM_UNLOCK's
scan_mapping_unevictable_pages().  The first is already commented, and
not worth worrying about; but the second can leave pages on the
Unevictable list after an unusual sequence of swapping and locking.

Fix that by using shmem_find_get_pages_and_swap() (then ignoring the
swap) instead of pagevec_lookup().

But I don't want to contaminate vmscan.c with shmem internals, nor
shmem.c with LRU locking.  So move scan_mapping_unevictable_pages() into
shmem.c, renaming it shmem_unlock_mapping(); and rename
check_move_unevictable_page() to check_move_unevictable_pages(), looping
down an array of pages, oftentimes under the same lock.

Leave out the "rotate unevictable list" block: that's a leftover from
when this was used for /proc/sys/vm/scan_unevictable_pages, whose flawed
handling involved looking at pages at tail of LRU.

Was there significance to the sequence first ClearPageUnevictable, then
test page_evictable, then SetPageUnevictable here? I think not, we're
under LRU lock, and have no barriers between those.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michel Lespinasse <walken@google.com>
Cc: <stable@vger.kernel.org> [back to 3.1 but will need respins]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h |   1 +
 include/linux/swap.h     |   2 +-
 ipc/shm.c                |   2 +-
 mm/shmem.c               |  46 +++++++++++++++--
 mm/vmscan.c              | 128 +++++++++++++++--------------------------------
 5 files changed, 83 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e4c711c6f321..79ab2555b3b0 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,6 +48,7 @@ extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern void shmem_unlock_mapping(struct address_space *mapping);
 extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 06061a7f8e69..3e60228e7299 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -273,7 +273,7 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 #endif
 
 extern int page_evictable(struct page *page, struct vm_area_struct *vma);
-extern void scan_mapping_unevictable_pages(struct address_space *);
+extern void check_move_unevictable_pages(struct page **, int nr_pages);
 
 extern unsigned long scan_unevictable_pages;
 extern int scan_unevictable_handler(struct ctl_table *, int,
diff --git a/ipc/shm.c b/ipc/shm.c
index 854ab58e5f6e..b76be5bda6c2 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -916,7 +916,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 		shp->mlock_user = NULL;
 		get_file(shm_file);
 		shm_unlock(shp);
-		scan_mapping_unevictable_pages(shm_file->f_mapping);
+		shmem_unlock_mapping(shm_file->f_mapping);
 		fput(shm_file);
 		goto out;
 	}
diff --git a/mm/shmem.c b/mm/shmem.c
index 4aaa53abe302..269d049294ab 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -379,7 +379,7 @@ static int shmem_free_swap(struct address_space *mapping,
 /*
  * Pagevec may contain swap entries, so shuffle up pages before releasing.
  */
-static void shmem_pagevec_release(struct pagevec *pvec)
+static void shmem_deswap_pagevec(struct pagevec *pvec)
 {
 	int i, j;
 
@@ -389,7 +389,36 @@ static void shmem_pagevec_release(struct pagevec *pvec)
 			pvec->pages[j++] = page;
 	}
 	pvec->nr = j;
-	pagevec_release(pvec);
+}
+
+/*
+ * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
+ */
+void shmem_unlock_mapping(struct address_space *mapping)
+{
+	struct pagevec pvec;
+	pgoff_t indices[PAGEVEC_SIZE];
+	pgoff_t index = 0;
+
+	pagevec_init(&pvec, 0);
+	/*
+	 * Minor point, but we might as well stop if someone else SHM_LOCKs it.
+	 */
+	while (!mapping_unevictable(mapping)) {
+		/*
+		 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
+		 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
+		 */
+		pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
+					PAGEVEC_SIZE, pvec.pages, indices);
+		if (!pvec.nr)
+			break;
+		index = indices[pvec.nr - 1] + 1;
+		shmem_deswap_pagevec(&pvec);
+		check_move_unevictable_pages(pvec.pages, pvec.nr);
+		pagevec_release(&pvec);
+		cond_resched();
+	}
 }
 
 /*
@@ -440,7 +469,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			}
 			unlock_page(page);
 		}
-		shmem_pagevec_release(&pvec);
+		shmem_deswap_pagevec(&pvec);
+		pagevec_release(&pvec);
 		mem_cgroup_uncharge_end();
 		cond_resched();
 		index++;
@@ -470,7 +500,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			continue;
 		}
 		if (index == start && indices[0] > end) {
-			shmem_pagevec_release(&pvec);
+			shmem_deswap_pagevec(&pvec);
+			pagevec_release(&pvec);
 			break;
 		}
 		mem_cgroup_uncharge_start();
@@ -494,7 +525,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 			}
 			unlock_page(page);
 		}
-		shmem_pagevec_release(&pvec);
+		shmem_deswap_pagevec(&pvec);
+		pagevec_release(&pvec);
 		mem_cgroup_uncharge_end();
 		index++;
 	}
@@ -2438,6 +2470,10 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	return 0;
 }
 
+void shmem_unlock_mapping(struct address_space *mapping)
+{
+}
+
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
 	truncate_inode_pages_range(inode->i_mapping, lstart, lend);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e097c1026b58..c52b23552659 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -26,7 +26,6 @@
 #include <linux/buffer_head.h>	/* for try_to_release_page(),
 					buffer_heads_over_limit */
 #include <linux/mm_inline.h>
-#include <linux/pagevec.h>
 #include <linux/backing-dev.h>
 #include <linux/rmap.h>
 #include <linux/topology.h>
@@ -661,7 +660,7 @@ redo:
 		 * When racing with an mlock or AS_UNEVICTABLE clearing
 		 * (page is unlocked) make sure that if the other thread
 		 * does not observe our setting of PG_lru and fails
-		 * isolation/check_move_unevictable_page,
+		 * isolation/check_move_unevictable_pages,
 		 * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
 		 * the page back to the evictable list.
 		 *
@@ -3501,107 +3500,58 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
 
 #ifdef CONFIG_SHMEM
 /**
- * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
- * @page: page to check evictability and move to appropriate lru list
- * @zone: zone page is in
+ * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
+ * @pages:	array of pages to check
+ * @nr_pages:	number of pages to check
  *
- * Checks a page for evictability and moves the page to the appropriate
- * zone lru list.
- *
- * Restrictions: zone->lru_lock must be held, page must be on LRU and must
- * have PageUnevictable set.
+ * Checks pages for evictability and moves them to the appropriate lru list.
  *
  * This function is only used for SysV IPC SHM_UNLOCK.
  */
-static void check_move_unevictable_page(struct page *page, struct zone *zone)
+void check_move_unevictable_pages(struct page **pages, int nr_pages)
 {
 	struct lruvec *lruvec;
+	struct zone *zone = NULL;
+	int pgscanned = 0;
+	int pgrescued = 0;
+	int i;
 
-	VM_BUG_ON(PageActive(page));
-retry:
-	ClearPageUnevictable(page);
-	if (page_evictable(page, NULL)) {
-		enum lru_list l = page_lru_base_type(page);
-
-		__dec_zone_state(zone, NR_UNEVICTABLE);
-		lruvec = mem_cgroup_lru_move_lists(zone, page,
-						   LRU_UNEVICTABLE, l);
-		list_move(&page->lru, &lruvec->lists[l]);
-		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
-		__count_vm_event(UNEVICTABLE_PGRESCUED);
-	} else {
-		/*
-		 * rotate unevictable list
-		 */
-		SetPageUnevictable(page);
-		lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE,
-						   LRU_UNEVICTABLE);
-		list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]);
-		if (page_evictable(page, NULL))
-			goto retry;
-	}
-}
-
-/**
- * scan_mapping_unevictable_pages - scan an address space for evictable pages
- * @mapping: struct address_space to scan for evictable pages
- *
- * Scan all pages in mapping.  Check unevictable pages for
- * evictability and move them to the appropriate zone lru list.
- *
- * This function is only used for SysV IPC SHM_UNLOCK.
- */
-void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
-	pgoff_t next = 0;
-	pgoff_t end   = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
-			 PAGE_CACHE_SHIFT;
-	struct zone *zone;
-	struct pagevec pvec;
-
-	if (mapping->nrpages == 0)
-		return;
-
-	pagevec_init(&pvec, 0);
-	while (next < end &&
-		pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
-		int i;
-		int pg_scanned = 0;
-
-		zone = NULL;
-
-		for (i = 0; i < pagevec_count(&pvec); i++) {
-			struct page *page = pvec.pages[i];
-			pgoff_t page_index = page->index;
-			struct zone *pagezone = page_zone(page);
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page = pages[i];
+		struct zone *pagezone;
 
-			pg_scanned++;
-			if (page_index > next)
-				next = page_index;
-			next++;
+		pgscanned++;
+		pagezone = page_zone(page);
+		if (pagezone != zone) {
+			if (zone)
+				spin_unlock_irq(&zone->lru_lock);
+			zone = pagezone;
+			spin_lock_irq(&zone->lru_lock);
+		}
 
-			if (pagezone != zone) {
-				if (zone)
-					spin_unlock_irq(&zone->lru_lock);
-				zone = pagezone;
-				spin_lock_irq(&zone->lru_lock);
-			}
+		if (!PageLRU(page) || !PageUnevictable(page))
+			continue;
 
-			if (PageLRU(page) && PageUnevictable(page))
-				check_move_unevictable_page(page, zone);
+		if (page_evictable(page, NULL)) {
+			enum lru_list lru = page_lru_base_type(page);
+
+			VM_BUG_ON(PageActive(page));
+			ClearPageUnevictable(page);
+			__dec_zone_state(zone, NR_UNEVICTABLE);
+			lruvec = mem_cgroup_lru_move_lists(zone, page,
+						LRU_UNEVICTABLE, lru);
+			list_move(&page->lru, &lruvec->lists[lru]);
+			__inc_zone_state(zone, NR_INACTIVE_ANON + lru);
+			pgrescued++;
 		}
-		if (zone)
-			spin_unlock_irq(&zone->lru_lock);
-		pagevec_release(&pvec);
+	}
 
-		count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
-		cond_resched();
+	if (zone) {
+		__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+		__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+		spin_unlock_irq(&zone->lru_lock);
 	}
 }
-#else
-void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
-}
 #endif /* CONFIG_SHMEM */
 
 static void warn_scan_unevictable_pages(void)
-- 
cgit v1.2.3


From 2eda013f4894bc200124f791a56c4defb613a0cc Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:51 -0800
Subject: kernel-doc: fix new warnings in device.h

Fix new kernel-doc warnings:

Warning(include/linux/device.h:299): No description found for parameter 'name'
Warning(include/linux/device.h:299): No description found for parameter 'subsys'
Warning(include/linux/device.h:299): No description found for parameter 'node'
Warning(include/linux/device.h:299): No description found for parameter 'add_dev'
Warning(include/linux/device.h:299): No description found for parameter 'remove_dev'
Warning(include/linux/device.h:685): No description found for parameter 'id'
Warning(include/linux/device.h:1009): No description found for parameter '__driver'
Warning(include/linux/device.h:1009): No description found for parameter '__register'
Warning(include/linux/device.h:1009): No description found for parameter '__unregister'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/device.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 5b3adb8f9588..b63fb393aa58 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -279,11 +279,11 @@ struct device *driver_find_device(struct device_driver *drv,
 
 /**
  * struct subsys_interface - interfaces to device functions
- * @name        name of the device function
- * @subsystem   subsytem of the devices to attach to
- * @node        the list of functions registered at the subsystem
- * @add         device hookup to device function handler
- * @remove      device hookup to device function handler
+ * @name:       name of the device function
+ * @subsys:     subsytem of the devices to attach to
+ * @node:       the list of functions registered at the subsystem
+ * @add_dev:    device hookup to device function handler
+ * @remove_dev: device hookup to device function handler
  *
  * Simple interfaces attached to a subsystem. Multiple interfaces can
  * attach to a subsystem and its devices. Unlike drivers, they do not
@@ -612,6 +612,7 @@ struct device_dma_parameters {
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @devt:	For creating the sysfs "dev".
+ * @id:		device instance
  * @devres_lock: Spinlock to protect the resource of the device.
  * @devres_head: The resources list of the device.
  * @knode_class: The node used to add the device to the class list.
@@ -1003,6 +1004,10 @@ extern long sysfs_deprecated;
  * Each module may only use this macro once, and calling it replaces
  * module_init() and module_exit().
  *
+ * @__driver: driver name
+ * @__register: register function for this driver type
+ * @__unregister: unregister function for this driver type
+ *
  * Use this macro to construct bus specific macros for registering
  * drivers, and do not use it on its own.
  */
-- 
cgit v1.2.3


From 4d922612df8bd1202a1f51d95b78aca3d67302cd Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:56 -0800
Subject: kernel-doc: fix new warning in usb.h

Fix new kernel-doc warning:

Warning(include/linux/usb.h:1251): No description found for parameter 'num_mapped_sgs'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 27a4e16d2bf1..69d845739bc2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1073,6 +1073,7 @@ typedef void (*usb_complete_t)(struct urb *);
  *	which the host controller driver should use in preference to the
  *	transfer_buffer.
  * @sg: scatter gather buffer list
+ * @num_mapped_sgs: (internal) number of mapped sg entries
  * @num_sgs: number of entries in the sg list
  * @transfer_buffer_length: How big is transfer_buffer.  The transfer may
  *	be broken up into chunks according to the current maximum packet
-- 
cgit v1.2.3


From fa757281a08799fd6c0f7ec6f111d1cd66afc97b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:03:13 -0800
Subject: kernel-doc: fix kernel-doc warnings in sched

Fix new kernel-doc notation warnings:

Warning(include/linux/sched.h:2094): No description found for parameter 'p'
Warning(include/linux/sched.h:2094): Excess function parameter 'tsk' description in 'is_idle_task'
Warning(kernel/sched/cpupri.c:139): No description found for parameter 'newpri'
Warning(kernel/sched/cpupri.c:139): Excess function parameter 'pri' description in 'cpupri_set'
Warning(kernel/sched/cpupri.c:208): Excess function parameter 'bootmem' description in 'cpupri_init'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc:	Ingo Molnar <mingo@elte.hu>
Cc:	Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 kernel/sched/cpupri.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4032ec1cf836..513f52459872 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2088,7 +2088,7 @@ extern int sched_setscheduler_nocheck(struct task_struct *, int,
 extern struct task_struct *idle_task(int cpu);
 /**
  * is_idle_task - is the specified task an idle task?
- * @tsk: the task in question.
+ * @p: the task in question.
  */
 static inline bool is_idle_task(struct task_struct *p)
 {
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index b0d798eaf130..d72586fdf660 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -129,7 +129,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
  * cpupri_set - update the cpu priority setting
  * @cp: The cpupri context
  * @cpu: The target cpu
- * @pri: The priority (INVALID-RT99) to assign to this CPU
+ * @newpri: The priority (INVALID-RT99) to assign to this CPU
  *
  * Note: Assumes cpu_rq(cpu)->lock is locked
  *
@@ -200,7 +200,6 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 /**
  * cpupri_init - initialize the cpupri structure
  * @cp: The cpupri context
- * @bootmem: true if allocations need to use bootmem
  *
  * Returns: -ENOMEM if memory fails.
  */
-- 
cgit v1.2.3


From c1aab02dac690af7ff634d8e1cb3be6a04387eef Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 24 Jan 2012 11:41:32 +1100
Subject: migrate_mode.h is not exported to user mode

so move its include into fs.h inside the __KERNEL__ protection.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b3a41fe22bf..386da09f229d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -10,7 +10,6 @@
 #include <linux/ioctl.h>
 #include <linux/blk_types.h>
 #include <linux/types.h>
-#include <linux/migrate_mode.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -397,6 +396,7 @@ struct inodes_stat_t {
 #include <linux/rculist_bl.h>
 #include <linux/atomic.h>
 #include <linux/shrinker.h>
+#include <linux/migrate_mode.h>
 
 #include <asm/byteorder.h>
 
-- 
cgit v1.2.3


From c1084a56da255ef5385c0f587e16fdc225a5460f Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 21 Dec 2011 10:19:38 +0200
Subject: usb: otg: kill langwell_otg driver

The way this driver was added by f0ae849 (usb: Add Intel Langwell USB
OTG Transceiver Driver) never even compiled together with langwell_udc,
and that's the only way for it to be useful.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: stable@vger.kernel.org # v2.6.31+
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Alan Cox <alan@linux.intel.com>
Cc: linux-usb@vger.kernel.org
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/otg/Kconfig          |   14 -
 drivers/usb/otg/Makefile         |    1 -
 drivers/usb/otg/langwell_otg.c   | 2347 --------------------------------------
 include/linux/usb/langwell_otg.h |  139 ---
 4 files changed, 2501 deletions(-)
 delete mode 100644 drivers/usb/otg/langwell_otg.c
 delete mode 100644 include/linux/usb/langwell_otg.h

(limited to 'include/linux')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index 2a25955881fc..9105c285f594 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -86,20 +86,6 @@ config NOP_USB_XCEIV
 	  built-in with usb ip or which are autonomous and doesn't require any
 	  phy programming such as ISP1x04 etc.
 
-config USB_LANGWELL_OTG
-	tristate "Intel Langwell USB OTG dual-role support"
-	depends on USB && PCI && INTEL_SCU_IPC
-	select USB_OTG
-	select USB_OTG_UTILS
-	help
-	  Say Y here if you want to build Intel Langwell USB OTG
-	  transciever driver in kernel. This driver implements role
-	  switch between EHCI host driver and Langwell USB OTG
-	  client driver.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called langwell_otg.
-
 config USB_MSM_OTG
 	tristate "OTG support for Qualcomm on-chip USB controller"
 	depends on (USB || USB_GADGET) && ARCH_MSM
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index b2c5a9598637..41aa5098b139 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_USB_GPIO_VBUS)	+= gpio_vbus.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
 obj-$(CONFIG_TWL4030_USB)	+= twl4030-usb.o
 obj-$(CONFIG_TWL6030_USB)	+= twl6030-usb.o
-obj-$(CONFIG_USB_LANGWELL_OTG)	+= langwell_otg.o
 obj-$(CONFIG_NOP_USB_XCEIV)	+= nop-usb-xceiv.o
 obj-$(CONFIG_USB_ULPI)		+= ulpi.o
 obj-$(CONFIG_USB_ULPI_VIEWPORT)	+= ulpi_viewport.o
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
deleted file mode 100644
index f08f784086f7..000000000000
--- a/drivers/usb/otg/langwell_otg.c
+++ /dev/null
@@ -1,2347 +0,0 @@
-/*
- * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008 - 2010, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-/* This driver helps to switch Langwell OTG controller function between host
- * and peripheral. It works with EHCI driver and Langwell client controller
- * driver together.
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/moduleparam.h>
-#include <linux/usb/ch9.h>
-#include <linux/usb/gadget.h>
-#include <linux/usb.h>
-#include <linux/usb/otg.h>
-#include <linux/usb/hcd.h>
-#include <linux/notifier.h>
-#include <linux/delay.h>
-#include <asm/intel_scu_ipc.h>
-
-#include <linux/usb/langwell_otg.h>
-
-#define	DRIVER_DESC		"Intel Langwell USB OTG transceiver driver"
-#define	DRIVER_VERSION		"July 10, 2010"
-
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_AUTHOR("Henry Yuan <hang.yuan@intel.com>, Hao Wu <hao.wu@intel.com>");
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
-
-static const char driver_name[] = "langwell_otg";
-
-static int langwell_otg_probe(struct pci_dev *pdev,
-			const struct pci_device_id *id);
-static void langwell_otg_remove(struct pci_dev *pdev);
-static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message);
-static int langwell_otg_resume(struct pci_dev *pdev);
-
-static int langwell_otg_set_host(struct otg_transceiver *otg,
-				struct usb_bus *host);
-static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
-				struct usb_gadget *gadget);
-static int langwell_otg_start_srp(struct otg_transceiver *otg);
-
-static const struct pci_device_id pci_ids[] = {{
-	.class =        ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
-	.class_mask =   ~0,
-	.vendor =	0x8086,
-	.device =	0x0811,
-	.subvendor =	PCI_ANY_ID,
-	.subdevice =	PCI_ANY_ID,
-}, { /* end: all zeroes */ }
-};
-
-static struct pci_driver otg_pci_driver = {
-	.name =		(char *) driver_name,
-	.id_table =	pci_ids,
-
-	.probe =	langwell_otg_probe,
-	.remove =	langwell_otg_remove,
-
-	.suspend =	langwell_otg_suspend,
-	.resume =	langwell_otg_resume,
-};
-
-/* HSM timers */
-static inline struct langwell_otg_timer *otg_timer_initializer
-(void (*function)(unsigned long), unsigned long expires, unsigned long data)
-{
-	struct langwell_otg_timer *timer;
-	timer = kmalloc(sizeof(struct langwell_otg_timer), GFP_KERNEL);
-	if (timer == NULL)
-		return timer;
-
-	timer->function = function;
-	timer->expires = expires;
-	timer->data = data;
-	return timer;
-}
-
-static struct langwell_otg_timer *a_wait_vrise_tmr, *a_aidl_bdis_tmr,
-	*b_se0_srp_tmr, *b_srp_init_tmr;
-
-static struct list_head active_timers;
-
-static struct langwell_otg *the_transceiver;
-
-/* host/client notify transceiver when event affects HNP state */
-void langwell_update_transceiver(void)
-{
-	struct langwell_otg *lnw = the_transceiver;
-
-	dev_dbg(lnw->dev, "transceiver is updated\n");
-
-	if (!lnw->qwork)
-		return ;
-
-	queue_work(lnw->qwork, &lnw->work);
-}
-EXPORT_SYMBOL(langwell_update_transceiver);
-
-static int langwell_otg_set_host(struct otg_transceiver *otg,
-					struct usb_bus *host)
-{
-	otg->host = host;
-
-	return 0;
-}
-
-static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
-					struct usb_gadget *gadget)
-{
-	otg->gadget = gadget;
-
-	return 0;
-}
-
-static int langwell_otg_set_power(struct otg_transceiver *otg,
-				unsigned mA)
-{
-	return 0;
-}
-
-/* A-device drives vbus, controlled through IPC commands */
-static int langwell_otg_set_vbus(struct otg_transceiver *otg, bool enabled)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	u8				sub_id;
-
-	dev_dbg(lnw->dev, "%s <--- %s\n", __func__, enabled ? "on" : "off");
-
-	if (enabled)
-		sub_id = 0x8; /* Turn on the VBus */
-	else
-		sub_id = 0x9; /* Turn off the VBus */
-
-	if (intel_scu_ipc_simple_command(0xef, sub_id)) {
-		dev_dbg(lnw->dev, "Failed to set Vbus via IPC commands\n");
-		return -EBUSY;
-	}
-
-	dev_dbg(lnw->dev, "%s --->\n", __func__);
-
-	return 0;
-}
-
-/* charge vbus or discharge vbus through a resistor to ground */
-static void langwell_otg_chrg_vbus(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	u32	val;
-
-	val = readl(lnw->iotg.base + CI_OTGSC);
-
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VC,
-				lnw->iotg.base + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VD,
-				lnw->iotg.base + CI_OTGSC);
-}
-
-/* Start SRP */
-static int langwell_otg_start_srp(struct otg_transceiver *otg)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s --->\n", __func__);
-
-	val = readl(iotg->base + CI_OTGSC);
-
-	writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HADP,
-				iotg->base + CI_OTGSC);
-
-	/* Check if the data plus is finished or not */
-	msleep(8);
-	val = readl(iotg->base + CI_OTGSC);
-	if (val & (OTGSC_HADP | OTGSC_DP))
-		dev_dbg(lnw->dev, "DataLine SRP Error\n");
-
-	/* Disable interrupt - b_sess_vld */
-	val = readl(iotg->base + CI_OTGSC);
-	val &= (~(OTGSC_BSVIE | OTGSC_BSEIE));
-	writel(val, iotg->base + CI_OTGSC);
-
-	/* Start VBus SRP, drive vbus to generate VBus pulse */
-	iotg->otg.set_vbus(&iotg->otg, true);
-	msleep(15);
-	iotg->otg.set_vbus(&iotg->otg, false);
-
-	/* Enable interrupt - b_sess_vld*/
-	val = readl(iotg->base + CI_OTGSC);
-	dev_dbg(lnw->dev, "after VBUS pulse otgsc = %x\n", val);
-
-	val |= (OTGSC_BSVIE | OTGSC_BSEIE);
-	writel(val, iotg->base + CI_OTGSC);
-
-	/* If Vbus is valid, then update the hsm */
-	if (val & OTGSC_BSV) {
-		dev_dbg(lnw->dev, "no b_sess_vld interrupt\n");
-
-		lnw->iotg.hsm.b_sess_vld = 1;
-		langwell_update_transceiver();
-	}
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-	return 0;
-}
-
-/* stop SOF via bus_suspend */
-static void langwell_otg_loc_sof(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	struct usb_hcd		*hcd;
-	int			err;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "suspend" : "resume");
-
-	hcd = bus_to_hcd(lnw->iotg.otg.host);
-	if (on)
-		err = hcd->driver->bus_resume(hcd);
-	else
-		err = hcd->driver->bus_suspend(hcd);
-
-	if (err)
-		dev_dbg(lnw->dev, "Fail to resume/suspend USB bus - %d\n", err);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-static int langwell_otg_check_otgsc(void)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	u32				otgsc, usbcfg;
-
-	dev_dbg(lnw->dev, "check sync OTGSC and USBCFG registers\n");
-
-	otgsc = readl(lnw->iotg.base + CI_OTGSC);
-	usbcfg = readl(lnw->usbcfg);
-
-	dev_dbg(lnw->dev, "OTGSC = %08x, USBCFG = %08x\n",
-					otgsc, usbcfg);
-	dev_dbg(lnw->dev, "OTGSC_AVV = %d\n", !!(otgsc & OTGSC_AVV));
-	dev_dbg(lnw->dev, "USBCFG.VBUSVAL = %d\n",
-					!!(usbcfg & USBCFG_VBUSVAL));
-	dev_dbg(lnw->dev, "OTGSC_ASV = %d\n", !!(otgsc & OTGSC_ASV));
-	dev_dbg(lnw->dev, "USBCFG.AVALID = %d\n",
-					!!(usbcfg & USBCFG_AVALID));
-	dev_dbg(lnw->dev, "OTGSC_BSV = %d\n", !!(otgsc & OTGSC_BSV));
-	dev_dbg(lnw->dev, "USBCFG.BVALID = %d\n",
-					!!(usbcfg & USBCFG_BVALID));
-	dev_dbg(lnw->dev, "OTGSC_BSE = %d\n", !!(otgsc & OTGSC_BSE));
-	dev_dbg(lnw->dev, "USBCFG.SESEND = %d\n",
-					!!(usbcfg & USBCFG_SESEND));
-
-	/* Check USBCFG VBusValid/AValid/BValid/SessEnd */
-	if (!!(otgsc & OTGSC_AVV) ^ !!(usbcfg & USBCFG_VBUSVAL)) {
-		dev_dbg(lnw->dev, "OTGSC.AVV != USBCFG.VBUSVAL\n");
-		goto err;
-	}
-	if (!!(otgsc & OTGSC_ASV) ^ !!(usbcfg & USBCFG_AVALID)) {
-		dev_dbg(lnw->dev, "OTGSC.ASV != USBCFG.AVALID\n");
-		goto err;
-	}
-	if (!!(otgsc & OTGSC_BSV) ^ !!(usbcfg & USBCFG_BVALID)) {
-		dev_dbg(lnw->dev, "OTGSC.BSV != USBCFG.BVALID\n");
-		goto err;
-	}
-	if (!!(otgsc & OTGSC_BSE) ^ !!(usbcfg & USBCFG_SESEND)) {
-		dev_dbg(lnw->dev, "OTGSC.BSE != USBCFG.SESSEN\n");
-		goto err;
-	}
-
-	dev_dbg(lnw->dev, "OTGSC and USBCFG are synced\n");
-
-	return 0;
-
-err:
-	dev_warn(lnw->dev, "OTGSC isn't equal to USBCFG\n");
-	return -EPIPE;
-}
-
-
-static void langwell_otg_phy_low_power(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u8				val, phcd;
-	int				retval;
-
-	dev_dbg(lnw->dev, "%s ---> %s mode\n",
-			__func__, on ? "Low power" : "Normal");
-
-	phcd = 0x40;
-
-	val = readb(iotg->base + CI_HOSTPC1 + 2);
-
-	if (on) {
-		/* Due to hardware issue, after set PHCD, sync will failed
-		 * between USBCFG and OTGSC, so before set PHCD, check if
-		 * sync is in process now. If the answer is "yes", then do
-		 * not touch PHCD bit */
-		retval = langwell_otg_check_otgsc();
-		if (retval) {
-			dev_dbg(lnw->dev, "Skip PHCD programming..\n");
-			return ;
-		}
-
-		writeb(val | phcd, iotg->base + CI_HOSTPC1 + 2);
-	} else
-		writeb(val & ~phcd, iotg->base + CI_HOSTPC1 + 2);
-
-	dev_dbg(lnw->dev, "%s <--- done\n", __func__);
-}
-
-/* After drv vbus, add 5 ms delay to set PHCD */
-static void langwell_otg_phy_low_power_wait(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-
-	dev_dbg(lnw->dev, "add 5ms delay before programing PHCD\n");
-
-	mdelay(5);
-	langwell_otg_phy_low_power(on);
-}
-
-/* Enable/Disable OTG interrupt */
-static void langwell_otg_intr(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
-
-	val = readl(iotg->base + CI_OTGSC);
-
-	/* OTGSC_INT_MASK doesn't contains 1msInt */
-	if (on) {
-		val = val | (OTGSC_INT_MASK);
-		writel(val, iotg->base + CI_OTGSC);
-	} else {
-		val = val & ~(OTGSC_INT_MASK);
-		writel(val, iotg->base + CI_OTGSC);
-	}
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-/* set HAAR: Hardware Assist Auto-Reset */
-static void langwell_otg_HAAR(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
-
-	val = readl(iotg->base + CI_OTGSC);
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HAAR,
-					iotg->base + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HAAR,
-					iotg->base + CI_OTGSC);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-/* set HABA: Hardware Assist B-Disconnect to A-Connect */
-static void langwell_otg_HABA(int on)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val;
-
-	dev_dbg(lnw->dev, "%s ---> %s\n", __func__, on ? "on" : "off");
-
-	val = readl(iotg->base + CI_OTGSC);
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HABA,
-					iotg->base + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HABA,
-					iotg->base + CI_OTGSC);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-}
-
-static int langwell_otg_check_se0_srp(int on)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	int			delay_time = TB_SE0_SRP * 10;
-	u32			val;
-
-	dev_dbg(lnw->dev, "%s --->\n", __func__);
-
-	do {
-		udelay(100);
-		if (!delay_time--)
-			break;
-		val = readl(lnw->iotg.base + CI_PORTSC1);
-		val &= PORTSC_LS;
-	} while (!val);
-
-	dev_dbg(lnw->dev, "%s <---\n", __func__);
-	return val;
-}
-
-/* The timeout callback function to set time out bit */
-static void set_tmout(unsigned long indicator)
-{
-	*(int *)indicator = 1;
-}
-
-void langwell_otg_nsf_msg(unsigned long indicator)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-
-	switch (indicator) {
-	case 2:
-	case 4:
-	case 6:
-	case 7:
-		dev_warn(lnw->dev,
-			"OTG:NSF-%lu - deivce not responding\n", indicator);
-		break;
-	case 3:
-		dev_warn(lnw->dev,
-			"OTG:NSF-%lu - deivce not supported\n", indicator);
-		break;
-	default:
-		dev_warn(lnw->dev, "Do not have this kind of NSF\n");
-		break;
-	}
-}
-
-/* Initialize timers */
-static int langwell_otg_init_timers(struct otg_hsm *hsm)
-{
-	/* HSM used timers */
-	a_wait_vrise_tmr = otg_timer_initializer(&set_tmout, TA_WAIT_VRISE,
-				(unsigned long)&hsm->a_wait_vrise_tmout);
-	if (a_wait_vrise_tmr == NULL)
-		return -ENOMEM;
-	a_aidl_bdis_tmr = otg_timer_initializer(&set_tmout, TA_AIDL_BDIS,
-				(unsigned long)&hsm->a_aidl_bdis_tmout);
-	if (a_aidl_bdis_tmr == NULL)
-		return -ENOMEM;
-	b_se0_srp_tmr = otg_timer_initializer(&set_tmout, TB_SE0_SRP,
-				(unsigned long)&hsm->b_se0_srp);
-	if (b_se0_srp_tmr == NULL)
-		return -ENOMEM;
-	b_srp_init_tmr = otg_timer_initializer(&set_tmout, TB_SRP_INIT,
-				(unsigned long)&hsm->b_srp_init_tmout);
-	if (b_srp_init_tmr == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/* Free timers */
-static void langwell_otg_free_timers(void)
-{
-	kfree(a_wait_vrise_tmr);
-	kfree(a_aidl_bdis_tmr);
-	kfree(b_se0_srp_tmr);
-	kfree(b_srp_init_tmr);
-}
-
-/* The timeout callback function to set time out bit */
-static void langwell_otg_timer_fn(unsigned long indicator)
-{
-	struct langwell_otg *lnw = the_transceiver;
-
-	*(int *)indicator = 1;
-
-	dev_dbg(lnw->dev, "kernel timer - timeout\n");
-
-	langwell_update_transceiver();
-}
-
-/* kernel timer used instead of HW based interrupt */
-static void langwell_otg_add_ktimer(enum langwell_otg_timer_type timers)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	unsigned long		j = jiffies;
-	unsigned long		data, time;
-
-	switch (timers) {
-	case TA_WAIT_VRISE_TMR:
-		iotg->hsm.a_wait_vrise_tmout = 0;
-		data = (unsigned long)&iotg->hsm.a_wait_vrise_tmout;
-		time = TA_WAIT_VRISE;
-		break;
-	case TA_WAIT_BCON_TMR:
-		iotg->hsm.a_wait_bcon_tmout = 0;
-		data = (unsigned long)&iotg->hsm.a_wait_bcon_tmout;
-		time = TA_WAIT_BCON;
-		break;
-	case TA_AIDL_BDIS_TMR:
-		iotg->hsm.a_aidl_bdis_tmout = 0;
-		data = (unsigned long)&iotg->hsm.a_aidl_bdis_tmout;
-		time = TA_AIDL_BDIS;
-		break;
-	case TB_ASE0_BRST_TMR:
-		iotg->hsm.b_ase0_brst_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_ase0_brst_tmout;
-		time = TB_ASE0_BRST;
-		break;
-	case TB_SRP_INIT_TMR:
-		iotg->hsm.b_srp_init_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_srp_init_tmout;
-		time = TB_SRP_INIT;
-		break;
-	case TB_SRP_FAIL_TMR:
-		iotg->hsm.b_srp_fail_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_srp_fail_tmout;
-		time = TB_SRP_FAIL;
-		break;
-	case TB_BUS_SUSPEND_TMR:
-		iotg->hsm.b_bus_suspend_tmout = 0;
-		data = (unsigned long)&iotg->hsm.b_bus_suspend_tmout;
-		time = TB_BUS_SUSPEND;
-		break;
-	default:
-		dev_dbg(lnw->dev, "unknown timer, cannot enable it\n");
-		return;
-	}
-
-	lnw->hsm_timer.data = data;
-	lnw->hsm_timer.function = langwell_otg_timer_fn;
-	lnw->hsm_timer.expires = j + time * HZ / 1000; /* milliseconds */
-
-	add_timer(&lnw->hsm_timer);
-
-	dev_dbg(lnw->dev, "add timer successfully\n");
-}
-
-/* Add timer to timer list */
-static void langwell_otg_add_timer(void *gtimer)
-{
-	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
-	struct langwell_otg_timer *tmp_timer;
-	struct intel_mid_otg_xceiv *iotg = &the_transceiver->iotg;
-	u32	val32;
-
-	/* Check if the timer is already in the active list,
-	 * if so update timer count
-	 */
-	list_for_each_entry(tmp_timer, &active_timers, list)
-		if (tmp_timer == timer) {
-			timer->count = timer->expires;
-			return;
-		}
-	timer->count = timer->expires;
-
-	if (list_empty(&active_timers)) {
-		val32 = readl(iotg->base + CI_OTGSC);
-		writel(val32 | OTGSC_1MSE, iotg->base + CI_OTGSC);
-	}
-
-	list_add_tail(&timer->list, &active_timers);
-}
-
-/* Remove timer from the timer list; clear timeout status */
-static void langwell_otg_del_timer(void *gtimer)
-{
-	struct langwell_otg *lnw = the_transceiver;
-	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
-	struct langwell_otg_timer *tmp_timer, *del_tmp;
-	u32 val32;
-
-	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list)
-		if (tmp_timer == timer)
-			list_del(&timer->list);
-
-	if (list_empty(&active_timers)) {
-		val32 = readl(lnw->iotg.base + CI_OTGSC);
-		writel(val32 & ~OTGSC_1MSE, lnw->iotg.base + CI_OTGSC);
-	}
-}
-
-/* Reduce timer count by 1, and find timeout conditions.*/
-static int langwell_otg_tick_timer(u32 *int_sts)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	struct langwell_otg_timer *tmp_timer, *del_tmp;
-	int expired = 0;
-
-	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list) {
-		tmp_timer->count--;
-		/* check if timer expires */
-		if (!tmp_timer->count) {
-			list_del(&tmp_timer->list);
-			tmp_timer->function(tmp_timer->data);
-			expired = 1;
-		}
-	}
-
-	if (list_empty(&active_timers)) {
-		dev_dbg(lnw->dev, "tick timer: disable 1ms int\n");
-		*int_sts = *int_sts & ~OTGSC_1MSE;
-	}
-	return expired;
-}
-
-static void reset_otg(void)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	int			delay_time = 1000;
-	u32			val;
-
-	dev_dbg(lnw->dev, "reseting OTG controller ...\n");
-	val = readl(lnw->iotg.base + CI_USBCMD);
-	writel(val | USBCMD_RST, lnw->iotg.base + CI_USBCMD);
-	do {
-		udelay(100);
-		if (!delay_time--)
-			dev_dbg(lnw->dev, "reset timeout\n");
-		val = readl(lnw->iotg.base + CI_USBCMD);
-		val &= USBCMD_RST;
-	} while (val != 0);
-	dev_dbg(lnw->dev, "reset done.\n");
-}
-
-static void set_host_mode(void)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	u32			val;
-
-	reset_otg();
-	val = readl(lnw->iotg.base + CI_USBMODE);
-	val = (val & (~USBMODE_CM)) | USBMODE_HOST;
-	writel(val, lnw->iotg.base + CI_USBMODE);
-}
-
-static void set_client_mode(void)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	u32			val;
-
-	reset_otg();
-	val = readl(lnw->iotg.base + CI_USBMODE);
-	val = (val & (~USBMODE_CM)) | USBMODE_DEVICE;
-	writel(val, lnw->iotg.base + CI_USBMODE);
-}
-
-static void init_hsm(void)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val32;
-
-	/* read OTGSC after reset */
-	val32 = readl(lnw->iotg.base + CI_OTGSC);
-	dev_dbg(lnw->dev, "%s: OTGSC init value = 0x%x\n", __func__, val32);
-
-	/* set init state */
-	if (val32 & OTGSC_ID) {
-		iotg->hsm.id = 1;
-		iotg->otg.default_a = 0;
-		set_client_mode();
-		iotg->otg.state = OTG_STATE_B_IDLE;
-	} else {
-		iotg->hsm.id = 0;
-		iotg->otg.default_a = 1;
-		set_host_mode();
-		iotg->otg.state = OTG_STATE_A_IDLE;
-	}
-
-	/* set session indicator */
-	if (val32 & OTGSC_BSE)
-		iotg->hsm.b_sess_end = 1;
-	if (val32 & OTGSC_BSV)
-		iotg->hsm.b_sess_vld = 1;
-	if (val32 & OTGSC_ASV)
-		iotg->hsm.a_sess_vld = 1;
-	if (val32 & OTGSC_AVV)
-		iotg->hsm.a_vbus_vld = 1;
-
-	/* defautly power the bus */
-	iotg->hsm.a_bus_req = 1;
-	iotg->hsm.a_bus_drop = 0;
-	/* defautly don't request bus as B device */
-	iotg->hsm.b_bus_req = 0;
-	/* no system error */
-	iotg->hsm.a_clr_err = 0;
-
-	langwell_otg_phy_low_power_wait(1);
-}
-
-static void update_hsm(void)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				val32;
-
-	/* read OTGSC */
-	val32 = readl(lnw->iotg.base + CI_OTGSC);
-	dev_dbg(lnw->dev, "%s: OTGSC value = 0x%x\n", __func__, val32);
-
-	iotg->hsm.id = !!(val32 & OTGSC_ID);
-	iotg->hsm.b_sess_end = !!(val32 & OTGSC_BSE);
-	iotg->hsm.b_sess_vld = !!(val32 & OTGSC_BSV);
-	iotg->hsm.a_sess_vld = !!(val32 & OTGSC_ASV);
-	iotg->hsm.a_vbus_vld = !!(val32 & OTGSC_AVV);
-}
-
-static irqreturn_t otg_dummy_irq(int irq, void *_dev)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	void __iomem		*reg_base = _dev;
-	u32			val;
-	u32			int_mask = 0;
-
-	val = readl(reg_base + CI_USBMODE);
-	if ((val & USBMODE_CM) != USBMODE_DEVICE)
-		return IRQ_NONE;
-
-	val = readl(reg_base + CI_USBSTS);
-	int_mask = val & INTR_DUMMY_MASK;
-
-	if (int_mask == 0)
-		return IRQ_NONE;
-
-	/* clear hsm.b_conn here since host driver can't detect it
-	*  otg_dummy_irq called means B-disconnect happened.
-	*/
-	if (lnw->iotg.hsm.b_conn) {
-		lnw->iotg.hsm.b_conn = 0;
-		if (spin_trylock(&lnw->wq_lock)) {
-			langwell_update_transceiver();
-			spin_unlock(&lnw->wq_lock);
-		}
-	}
-
-	/* Clear interrupts */
-	writel(int_mask, reg_base + CI_USBSTS);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t otg_irq(int irq, void *_dev)
-{
-	struct langwell_otg		*lnw = _dev;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	u32				int_sts, int_en;
-	u32				int_mask = 0;
-	int				flag = 0;
-
-	int_sts = readl(lnw->iotg.base + CI_OTGSC);
-	int_en = (int_sts & OTGSC_INTEN_MASK) >> 8;
-	int_mask = int_sts & int_en;
-	if (int_mask == 0)
-		return IRQ_NONE;
-
-	if (int_mask & OTGSC_IDIS) {
-		dev_dbg(lnw->dev, "%s: id change int\n", __func__);
-		iotg->hsm.id = (int_sts & OTGSC_ID) ? 1 : 0;
-		dev_dbg(lnw->dev, "id = %d\n", iotg->hsm.id);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_DPIS) {
-		dev_dbg(lnw->dev, "%s: data pulse int\n", __func__);
-		iotg->hsm.a_srp_det = (int_sts & OTGSC_DPS) ? 1 : 0;
-		dev_dbg(lnw->dev, "data pulse = %d\n", iotg->hsm.a_srp_det);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_BSEIS) {
-		dev_dbg(lnw->dev, "%s: b session end int\n", __func__);
-		iotg->hsm.b_sess_end = (int_sts & OTGSC_BSE) ? 1 : 0;
-		dev_dbg(lnw->dev, "b_sess_end = %d\n", iotg->hsm.b_sess_end);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_BSVIS) {
-		dev_dbg(lnw->dev, "%s: b session valid int\n", __func__);
-		iotg->hsm.b_sess_vld = (int_sts & OTGSC_BSV) ? 1 : 0;
-		dev_dbg(lnw->dev, "b_sess_vld = %d\n", iotg->hsm.b_sess_end);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_ASVIS) {
-		dev_dbg(lnw->dev, "%s: a session valid int\n", __func__);
-		iotg->hsm.a_sess_vld = (int_sts & OTGSC_ASV) ? 1 : 0;
-		dev_dbg(lnw->dev, "a_sess_vld = %d\n", iotg->hsm.a_sess_vld);
-		flag = 1;
-	}
-	if (int_mask & OTGSC_AVVIS) {
-		dev_dbg(lnw->dev, "%s: a vbus valid int\n", __func__);
-		iotg->hsm.a_vbus_vld = (int_sts & OTGSC_AVV) ? 1 : 0;
-		dev_dbg(lnw->dev, "a_vbus_vld = %d\n", iotg->hsm.a_vbus_vld);
-		flag = 1;
-	}
-
-	if (int_mask & OTGSC_1MSS) {
-		/* need to schedule otg_work if any timer is expired */
-		if (langwell_otg_tick_timer(&int_sts))
-			flag = 1;
-	}
-
-	writel((int_sts & ~OTGSC_INTSTS_MASK) | int_mask,
-					lnw->iotg.base + CI_OTGSC);
-	if (flag)
-		langwell_update_transceiver();
-
-	return IRQ_HANDLED;
-}
-
-static int langwell_otg_iotg_notify(struct notifier_block *nb,
-				unsigned long action, void *data)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = data;
-	int				flag = 0;
-
-	if (iotg == NULL)
-		return NOTIFY_BAD;
-
-	if (lnw == NULL)
-		return NOTIFY_BAD;
-
-	switch (action) {
-	case MID_OTG_NOTIFY_CONNECT:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Connect Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.b_conn = 1;
-		else
-			iotg->hsm.a_conn = 1;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_DISCONN:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Disconnect Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.b_conn = 0;
-		else
-			iotg->hsm.a_conn = 0;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_HSUSPEND:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Host Bus suspend Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.a_suspend_req = 1;
-		else
-			iotg->hsm.b_bus_req = 0;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_HRESUME:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Host Bus resume Event\n");
-		if (iotg->otg.default_a == 1)
-			iotg->hsm.b_bus_resume = 1;
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_CSUSPEND:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Client Bus suspend Event\n");
-		if (iotg->otg.default_a == 1) {
-			if (iotg->hsm.b_bus_suspend_vld == 2) {
-				iotg->hsm.b_bus_suspend = 1;
-				iotg->hsm.b_bus_suspend_vld = 0;
-				flag = 1;
-			} else {
-				iotg->hsm.b_bus_suspend_vld++;
-				flag = 0;
-			}
-		} else {
-			if (iotg->hsm.a_bus_suspend == 0) {
-				iotg->hsm.a_bus_suspend = 1;
-				flag = 1;
-			}
-		}
-		break;
-	case MID_OTG_NOTIFY_CRESUME:
-		dev_dbg(lnw->dev, "Lnw OTG Notify Client Bus resume Event\n");
-		if (iotg->otg.default_a == 0)
-			iotg->hsm.a_bus_suspend = 0;
-		flag = 0;
-		break;
-	case MID_OTG_NOTIFY_HOSTADD:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Host Driver Add\n");
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_HOSTREMOVE:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Host Driver remove\n");
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_CLIENTADD:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Client Driver Add\n");
-		flag = 1;
-		break;
-	case MID_OTG_NOTIFY_CLIENTREMOVE:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity Client Driver remove\n");
-		flag = 1;
-		break;
-	default:
-		dev_dbg(lnw->dev, "Lnw OTG Nofity unknown notify message\n");
-		return NOTIFY_DONE;
-	}
-
-	if (flag)
-		langwell_update_transceiver();
-
-	return NOTIFY_OK;
-}
-
-static void langwell_otg_work(struct work_struct *work)
-{
-	struct langwell_otg		*lnw;
-	struct intel_mid_otg_xceiv	*iotg;
-	int				retval;
-	struct pci_dev			*pdev;
-
-	lnw = container_of(work, struct langwell_otg, work);
-	iotg = &lnw->iotg;
-	pdev = to_pci_dev(lnw->dev);
-
-	dev_dbg(lnw->dev, "%s: old state = %s\n", __func__,
-			otg_state_string(iotg->otg.state));
-
-	switch (iotg->otg.state) {
-	case OTG_STATE_UNDEFINED:
-	case OTG_STATE_B_IDLE:
-		if (!iotg->hsm.id) {
-			langwell_otg_del_timer(b_srp_init_tmr);
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.b_sess_vld) {
-			langwell_otg_del_timer(b_srp_init_tmr);
-			del_timer_sync(&lnw->hsm_timer);
-			iotg->hsm.b_sess_end = 0;
-			iotg->hsm.a_bus_suspend = 0;
-			langwell_otg_chrg_vbus(0);
-
-			if (lnw->iotg.start_peripheral) {
-				lnw->iotg.start_peripheral(&lnw->iotg);
-				iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-			} else
-				dev_dbg(lnw->dev, "client driver not loaded\n");
-
-		} else if (iotg->hsm.b_srp_init_tmout) {
-			iotg->hsm.b_srp_init_tmout = 0;
-			dev_warn(lnw->dev, "SRP init timeout\n");
-		} else if (iotg->hsm.b_srp_fail_tmout) {
-			iotg->hsm.b_srp_fail_tmout = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			/* No silence failure */
-			langwell_otg_nsf_msg(6);
-		} else if (iotg->hsm.b_bus_req && iotg->hsm.b_sess_end) {
-			del_timer_sync(&lnw->hsm_timer);
-			/* workaround for b_se0_srp detection */
-			retval = langwell_otg_check_se0_srp(0);
-			if (retval) {
-				iotg->hsm.b_bus_req = 0;
-				dev_dbg(lnw->dev, "LS isn't SE0, try later\n");
-			} else {
-				/* clear the PHCD before start srp */
-				langwell_otg_phy_low_power(0);
-
-				/* Start SRP */
-				langwell_otg_add_timer(b_srp_init_tmr);
-				iotg->otg.start_srp(&iotg->otg);
-				langwell_otg_del_timer(b_srp_init_tmr);
-				langwell_otg_add_ktimer(TB_SRP_FAIL_TMR);
-
-				/* reset PHY low power mode here */
-				langwell_otg_phy_low_power_wait(1);
-			}
-		}
-		break;
-	case OTG_STATE_B_SRP_INIT:
-		if (!iotg->hsm.id) {
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_chrg_vbus(0);
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.b_sess_vld) {
-			langwell_otg_chrg_vbus(0);
-			if (lnw->iotg.start_peripheral) {
-				lnw->iotg.start_peripheral(&lnw->iotg);
-				iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-			} else
-				dev_dbg(lnw->dev, "client driver not loaded\n");
-		}
-		break;
-	case OTG_STATE_B_PERIPHERAL:
-		if (!iotg->hsm.id) {
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.b_sess_vld) {
-			iotg->hsm.b_hnp_enable = 0;
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if (iotg->hsm.b_bus_req && iotg->otg.gadget &&
-					iotg->otg.gadget->b_hnp_enable &&
-					iotg->hsm.a_bus_suspend) {
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			langwell_otg_HAAR(1);
-			iotg->hsm.a_conn = 0;
-
-			if (lnw->iotg.start_host) {
-				lnw->iotg.start_host(&lnw->iotg);
-				iotg->otg.state = OTG_STATE_B_WAIT_ACON;
-			} else
-				dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-
-			iotg->hsm.a_bus_resume = 0;
-			langwell_otg_add_ktimer(TB_ASE0_BRST_TMR);
-		}
-		break;
-
-	case OTG_STATE_B_WAIT_ACON:
-		if (!iotg->hsm.id) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-
-			langwell_otg_HAAR(0);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.b_sess_vld) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->hsm.b_hnp_enable = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			langwell_otg_chrg_vbus(0);
-			langwell_otg_HAAR(0);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if (iotg->hsm.a_conn) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			langwell_otg_HAAR(0);
-			iotg->otg.state = OTG_STATE_B_HOST;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_resume ||
-				iotg->hsm.b_ase0_brst_tmout) {
-			/* delete hsm timer for b_ase0_brst_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			langwell_otg_HAAR(0);
-			langwell_otg_nsf_msg(7);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			iotg->hsm.a_bus_suspend = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.start_peripheral)
-				lnw->iotg.start_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver not loaded.\n");
-
-			iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-		}
-		break;
-
-	case OTG_STATE_B_HOST:
-		if (!iotg->hsm.id) {
-			iotg->otg.default_a = 1;
-			iotg->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.b_sess_vld) {
-			iotg->hsm.b_hnp_enable = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			langwell_otg_chrg_vbus(0);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if ((!iotg->hsm.b_bus_req) ||
-				(!iotg->hsm.a_conn)) {
-			iotg->hsm.b_bus_req = 0;
-			langwell_otg_loc_sof(0);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			iotg->hsm.a_bus_suspend = 0;
-
-			if (lnw->iotg.start_peripheral)
-				lnw->iotg.start_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-						"client driver not loaded.\n");
-
-			iotg->otg.state = OTG_STATE_B_PERIPHERAL;
-		}
-		break;
-
-	case OTG_STATE_A_IDLE:
-		iotg->otg.default_a = 1;
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-			iotg->hsm.vbus_srp_up = 0;
-
-			langwell_otg_chrg_vbus(0);
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_bus_drop &&
-			(iotg->hsm.a_srp_det || iotg->hsm.a_bus_req)) {
-			langwell_otg_phy_low_power(0);
-
-			/* Turn on VBus */
-			iotg->otg.set_vbus(&iotg->otg, true);
-
-			iotg->hsm.vbus_srp_up = 0;
-			iotg->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_bus_drop && iotg->hsm.a_sess_vld) {
-			iotg->hsm.vbus_srp_up = 1;
-		} else if (!iotg->hsm.a_sess_vld && iotg->hsm.vbus_srp_up) {
-			msleep(10);
-			langwell_otg_phy_low_power(0);
-
-			/* Turn on VBus */
-			iotg->otg.set_vbus(&iotg->otg, true);
-			iotg->hsm.a_srp_det = 1;
-			iotg->hsm.vbus_srp_up = 0;
-			iotg->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_sess_vld &&
-				!iotg->hsm.vbus_srp_up) {
-			langwell_otg_phy_low_power(1);
-		}
-		break;
-	case OTG_STATE_A_WAIT_VRISE:
-		if (iotg->hsm.id) {
-			langwell_otg_del_timer(a_wait_vrise_tmr);
-			iotg->hsm.b_bus_req = 0;
-			iotg->otg.default_a = 0;
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-		} else if (iotg->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(a_wait_vrise_tmr);
-			iotg->hsm.b_conn = 0;
-			if (lnw->iotg.start_host)
-				lnw->iotg.start_host(&lnw->iotg);
-			else {
-				dev_dbg(lnw->dev, "host driver not loaded.\n");
-				break;
-			}
-
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		} else if (iotg->hsm.a_wait_vrise_tmout) {
-			iotg->hsm.b_conn = 0;
-			if (iotg->hsm.a_vbus_vld) {
-				if (lnw->iotg.start_host)
-					lnw->iotg.start_host(&lnw->iotg);
-				else {
-					dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-					break;
-				}
-				langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-				iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-			} else {
-
-				/* Turn off VBus */
-				iotg->otg.set_vbus(&iotg->otg, false);
-				langwell_otg_phy_low_power_wait(1);
-				iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-			}
-		}
-		break;
-	case OTG_STATE_A_WAIT_BCON:
-		if (iotg->hsm.id) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_vbus_vld) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (iotg->hsm.a_bus_drop ||
-				(iotg->hsm.a_wait_bcon_tmout &&
-				!iotg->hsm.a_bus_req)) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (iotg->hsm.b_conn) {
-			/* delete hsm timer for a_wait_bcon_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			iotg->hsm.a_suspend_req = 0;
-			iotg->otg.state = OTG_STATE_A_HOST;
-			if (iotg->hsm.a_srp_det && iotg->otg.host &&
-					!iotg->otg.host->b_hnp_enable) {
-				/* SRP capable peripheral-only device */
-				iotg->hsm.a_bus_req = 1;
-				iotg->hsm.a_srp_det = 0;
-			} else if (!iotg->hsm.a_bus_req && iotg->otg.host &&
-					iotg->otg.host->b_hnp_enable) {
-				/* It is not safe enough to do a fast
-				 * transition from A_WAIT_BCON to
-				 * A_SUSPEND */
-				msleep(10000);
-				if (iotg->hsm.a_bus_req)
-					break;
-
-				if (request_irq(pdev->irq,
-					otg_dummy_irq, IRQF_SHARED,
-					driver_name, iotg->base) != 0) {
-					dev_dbg(lnw->dev,
-						"request interrupt %d fail\n",
-						pdev->irq);
-				}
-
-				langwell_otg_HABA(1);
-				iotg->hsm.b_bus_resume = 0;
-				iotg->hsm.a_aidl_bdis_tmout = 0;
-
-				langwell_otg_loc_sof(0);
-				/* clear PHCD to enable HW timer */
-				langwell_otg_phy_low_power(0);
-				langwell_otg_add_timer(a_aidl_bdis_tmr);
-				iotg->otg.state = OTG_STATE_A_SUSPEND;
-			} else if (!iotg->hsm.a_bus_req && iotg->otg.host &&
-				!iotg->otg.host->b_hnp_enable) {
-				if (lnw->iotg.stop_host)
-					lnw->iotg.stop_host(&lnw->iotg);
-				else
-					dev_dbg(lnw->dev,
-						"host driver removed.\n");
-
-				/* Turn off VBus */
-				iotg->otg.set_vbus(&iotg->otg, false);
-				iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-			}
-		}
-		break;
-	case OTG_STATE_A_HOST:
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_drop ||
-				(iotg->otg.host &&
-				!iotg->otg.host->b_hnp_enable &&
-					!iotg->hsm.a_bus_req)) {
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (!iotg->hsm.a_vbus_vld) {
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (iotg->otg.host &&
-				iotg->otg.host->b_hnp_enable &&
-				!iotg->hsm.a_bus_req) {
-			/* Set HABA to enable hardware assistance to signal
-			 *  A-connect after receiver B-disconnect. Hardware
-			 *  will then set client mode and enable URE, SLE and
-			 *  PCE after the assistance. otg_dummy_irq is used to
-			 *  clean these ints when client driver is not resumed.
-			 */
-			if (request_irq(pdev->irq, otg_dummy_irq, IRQF_SHARED,
-					driver_name, iotg->base) != 0) {
-				dev_dbg(lnw->dev,
-					"request interrupt %d failed\n",
-						pdev->irq);
-			}
-
-			/* set HABA */
-			langwell_otg_HABA(1);
-			iotg->hsm.b_bus_resume = 0;
-			iotg->hsm.a_aidl_bdis_tmout = 0;
-			langwell_otg_loc_sof(0);
-			/* clear PHCD to enable HW timer */
-			langwell_otg_phy_low_power(0);
-			langwell_otg_add_timer(a_aidl_bdis_tmr);
-			iotg->otg.state = OTG_STATE_A_SUSPEND;
-		} else if (!iotg->hsm.b_conn || !iotg->hsm.a_bus_req) {
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		}
-		break;
-	case OTG_STATE_A_SUSPEND:
-		if (iotg->hsm.id) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_req ||
-				iotg->hsm.b_bus_resume) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			iotg->hsm.a_suspend_req = 0;
-			langwell_otg_loc_sof(1);
-			iotg->otg.state = OTG_STATE_A_HOST;
-		} else if (iotg->hsm.a_aidl_bdis_tmout ||
-				iotg->hsm.a_bus_drop) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (!iotg->hsm.b_conn && iotg->otg.host &&
-				iotg->otg.host->b_hnp_enable) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			iotg->hsm.b_bus_suspend = 0;
-			iotg->hsm.b_bus_suspend_vld = 0;
-
-			/* msleep(200); */
-			if (lnw->iotg.start_peripheral)
-				lnw->iotg.start_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver not loaded.\n");
-
-			langwell_otg_add_ktimer(TB_BUS_SUSPEND_TMR);
-			iotg->otg.state = OTG_STATE_A_PERIPHERAL;
-			break;
-		} else if (!iotg->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(pdev->irq, iotg->base);
-			if (lnw->iotg.stop_host)
-				lnw->iotg.stop_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"host driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		}
-		break;
-	case OTG_STATE_A_PERIPHERAL:
-		if (iotg->hsm.id) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-			iotg->otg.default_a = 0;
-			iotg->hsm.b_bus_req = 0;
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			set_client_mode();
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (!iotg->hsm.a_vbus_vld) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			langwell_otg_phy_low_power_wait(1);
-			iotg->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (iotg->hsm.a_bus_drop) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			/* Turn off VBus */
-			iotg->otg.set_vbus(&iotg->otg, false);
-			iotg->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (iotg->hsm.b_bus_suspend) {
-			/* delete hsm timer for b_bus_suspend_tmr */
-			del_timer_sync(&lnw->hsm_timer);
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			if (lnw->iotg.start_host)
-				lnw->iotg.start_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		} else if (iotg->hsm.b_bus_suspend_tmout) {
-			u32	val;
-			val = readl(lnw->iotg.base + CI_PORTSC1);
-			if (!(val & PORTSC_SUSP))
-				break;
-
-			if (lnw->iotg.stop_peripheral)
-				lnw->iotg.stop_peripheral(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-					"client driver has been removed.\n");
-
-			if (lnw->iotg.start_host)
-				lnw->iotg.start_host(&lnw->iotg);
-			else
-				dev_dbg(lnw->dev,
-						"host driver not loaded.\n");
-			langwell_otg_add_ktimer(TA_WAIT_BCON_TMR);
-			iotg->otg.state = OTG_STATE_A_WAIT_BCON;
-		}
-		break;
-	case OTG_STATE_A_VBUS_ERR:
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			iotg->hsm.a_clr_err = 0;
-			iotg->hsm.a_srp_det = 0;
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_clr_err) {
-			iotg->hsm.a_clr_err = 0;
-			iotg->hsm.a_srp_det = 0;
-			reset_otg();
-			init_hsm();
-			if (iotg->otg.state == OTG_STATE_A_IDLE)
-				langwell_update_transceiver();
-		} else {
-			/* FW will clear PHCD bit when any VBus
-			 * event detected. Reset PHCD to 1 again */
-			langwell_otg_phy_low_power(1);
-		}
-		break;
-	case OTG_STATE_A_WAIT_VFALL:
-		if (iotg->hsm.id) {
-			iotg->otg.default_a = 0;
-			set_client_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_B_IDLE;
-			langwell_update_transceiver();
-		} else if (iotg->hsm.a_bus_req) {
-
-			/* Turn on VBus */
-			iotg->otg.set_vbus(&iotg->otg, true);
-			iotg->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			iotg->otg.state = OTG_STATE_A_WAIT_VRISE;
-		} else if (!iotg->hsm.a_sess_vld) {
-			iotg->hsm.a_srp_det = 0;
-			set_host_mode();
-			langwell_otg_phy_low_power(1);
-			iotg->otg.state = OTG_STATE_A_IDLE;
-		}
-		break;
-	default:
-		;
-	}
-
-	dev_dbg(lnw->dev, "%s: new state = %s\n", __func__,
-			otg_state_string(iotg->otg.state));
-}
-
-static ssize_t
-show_registers(struct device *_dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size,
-		"\n"
-		"USBCMD = 0x%08x\n"
-		"USBSTS = 0x%08x\n"
-		"USBINTR = 0x%08x\n"
-		"ASYNCLISTADDR = 0x%08x\n"
-		"PORTSC1 = 0x%08x\n"
-		"HOSTPC1 = 0x%08x\n"
-		"OTGSC = 0x%08x\n"
-		"USBMODE = 0x%08x\n",
-		readl(lnw->iotg.base + 0x30),
-		readl(lnw->iotg.base + 0x34),
-		readl(lnw->iotg.base + 0x38),
-		readl(lnw->iotg.base + 0x48),
-		readl(lnw->iotg.base + 0x74),
-		readl(lnw->iotg.base + 0xb4),
-		readl(lnw->iotg.base + 0xf4),
-		readl(lnw->iotg.base + 0xf8)
-	     );
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-static DEVICE_ATTR(registers, S_IRUGO, show_registers, NULL);
-
-static ssize_t
-show_hsm(struct device *_dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	char				*next;
-	unsigned			size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	if (iotg->otg.host)
-		iotg->hsm.a_set_b_hnp_en = iotg->otg.host->b_hnp_enable;
-
-	if (iotg->otg.gadget)
-		iotg->hsm.b_hnp_enable = iotg->otg.gadget->b_hnp_enable;
-
-	t = scnprintf(next, size,
-		"\n"
-		"current state = %s\n"
-		"a_bus_resume = \t%d\n"
-		"a_bus_suspend = \t%d\n"
-		"a_conn = \t%d\n"
-		"a_sess_vld = \t%d\n"
-		"a_srp_det = \t%d\n"
-		"a_vbus_vld = \t%d\n"
-		"b_bus_resume = \t%d\n"
-		"b_bus_suspend = \t%d\n"
-		"b_conn = \t%d\n"
-		"b_se0_srp = \t%d\n"
-		"b_sess_end = \t%d\n"
-		"b_sess_vld = \t%d\n"
-		"id = \t%d\n"
-		"a_set_b_hnp_en = \t%d\n"
-		"b_srp_done = \t%d\n"
-		"b_hnp_enable = \t%d\n"
-		"a_wait_vrise_tmout = \t%d\n"
-		"a_wait_bcon_tmout = \t%d\n"
-		"a_aidl_bdis_tmout = \t%d\n"
-		"b_ase0_brst_tmout = \t%d\n"
-		"a_bus_drop = \t%d\n"
-		"a_bus_req = \t%d\n"
-		"a_clr_err = \t%d\n"
-		"a_suspend_req = \t%d\n"
-		"b_bus_req = \t%d\n"
-		"b_bus_suspend_tmout = \t%d\n"
-		"b_bus_suspend_vld = \t%d\n",
-		otg_state_string(iotg->otg.state),
-		iotg->hsm.a_bus_resume,
-		iotg->hsm.a_bus_suspend,
-		iotg->hsm.a_conn,
-		iotg->hsm.a_sess_vld,
-		iotg->hsm.a_srp_det,
-		iotg->hsm.a_vbus_vld,
-		iotg->hsm.b_bus_resume,
-		iotg->hsm.b_bus_suspend,
-		iotg->hsm.b_conn,
-		iotg->hsm.b_se0_srp,
-		iotg->hsm.b_sess_end,
-		iotg->hsm.b_sess_vld,
-		iotg->hsm.id,
-		iotg->hsm.a_set_b_hnp_en,
-		iotg->hsm.b_srp_done,
-		iotg->hsm.b_hnp_enable,
-		iotg->hsm.a_wait_vrise_tmout,
-		iotg->hsm.a_wait_bcon_tmout,
-		iotg->hsm.a_aidl_bdis_tmout,
-		iotg->hsm.b_ase0_brst_tmout,
-		iotg->hsm.a_bus_drop,
-		iotg->hsm.a_bus_req,
-		iotg->hsm.a_clr_err,
-		iotg->hsm.a_suspend_req,
-		iotg->hsm.b_bus_req,
-		iotg->hsm.b_bus_suspend_tmout,
-		iotg->hsm.b_bus_suspend_vld
-		);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-static DEVICE_ATTR(hsm, S_IRUGO, show_hsm, NULL);
-
-static ssize_t
-get_a_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", lnw->iotg.hsm.a_bus_req);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_a_bus_req(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (!iotg->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		iotg->hsm.a_bus_req = 0;
-		dev_dbg(lnw->dev, "User request: a_bus_req = 0\n");
-	} else if (buf[0] == '1') {
-		/* If a_bus_drop is TRUE, a_bus_req can't be set */
-		if (iotg->hsm.a_bus_drop)
-			return -1;
-		iotg->hsm.a_bus_req = 1;
-		dev_dbg(lnw->dev, "User request: a_bus_req = 1\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUSR, get_a_bus_req, set_a_bus_req);
-
-static ssize_t
-get_a_bus_drop(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", lnw->iotg.hsm.a_bus_drop);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_a_bus_drop(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (!iotg->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		iotg->hsm.a_bus_drop = 0;
-		dev_dbg(lnw->dev, "User request: a_bus_drop = 0\n");
-	} else if (buf[0] == '1') {
-		iotg->hsm.a_bus_drop = 1;
-		iotg->hsm.a_bus_req = 0;
-		dev_dbg(lnw->dev, "User request: a_bus_drop = 1\n");
-		dev_dbg(lnw->dev, "User request: and a_bus_req = 0\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUSR, get_a_bus_drop, set_a_bus_drop);
-
-static ssize_t
-get_b_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	char			*next;
-	unsigned		size, t;
-
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", lnw->iotg.hsm.b_bus_req);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_b_bus_req(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (iotg->otg.default_a)
-		return -1;
-
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		iotg->hsm.b_bus_req = 0;
-		dev_dbg(lnw->dev, "User request: b_bus_req = 0\n");
-	} else if (buf[0] == '1') {
-		iotg->hsm.b_bus_req = 1;
-		dev_dbg(lnw->dev, "User request: b_bus_req = 1\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUSR, get_b_bus_req, set_b_bus_req);
-
-static ssize_t
-set_a_clr_err(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-
-	if (!iotg->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '1') {
-		iotg->hsm.a_clr_err = 1;
-		dev_dbg(lnw->dev, "User request: a_clr_err = 1\n");
-	}
-	if (spin_trylock(&lnw->wq_lock)) {
-		langwell_update_transceiver();
-		spin_unlock(&lnw->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_clr_err, S_IWUSR, NULL, set_a_clr_err);
-
-static struct attribute *inputs_attrs[] = {
-	&dev_attr_a_bus_req.attr,
-	&dev_attr_a_bus_drop.attr,
-	&dev_attr_b_bus_req.attr,
-	&dev_attr_a_clr_err.attr,
-	NULL,
-};
-
-static struct attribute_group debug_dev_attr_group = {
-	.name = "inputs",
-	.attrs = inputs_attrs,
-};
-
-static int langwell_otg_probe(struct pci_dev *pdev,
-		const struct pci_device_id *id)
-{
-	unsigned long		resource, len;
-	void __iomem		*base = NULL;
-	int			retval;
-	u32			val32;
-	struct langwell_otg	*lnw;
-	char			qname[] = "langwell_otg_queue";
-
-	retval = 0;
-	dev_dbg(&pdev->dev, "\notg controller is detected.\n");
-	if (pci_enable_device(pdev) < 0) {
-		retval = -ENODEV;
-		goto done;
-	}
-
-	lnw = kzalloc(sizeof *lnw, GFP_KERNEL);
-	if (lnw == NULL) {
-		retval = -ENOMEM;
-		goto done;
-	}
-	the_transceiver = lnw;
-
-	/* control register: BAR 0 */
-	resource = pci_resource_start(pdev, 0);
-	len = pci_resource_len(pdev, 0);
-	if (!request_mem_region(resource, len, driver_name)) {
-		retval = -EBUSY;
-		goto err;
-	}
-	lnw->region = 1;
-
-	base = ioremap_nocache(resource, len);
-	if (base == NULL) {
-		retval = -EFAULT;
-		goto err;
-	}
-	lnw->iotg.base = base;
-
-	if (!request_mem_region(USBCFG_ADDR, USBCFG_LEN, driver_name)) {
-		retval = -EBUSY;
-		goto err;
-	}
-	lnw->cfg_region = 1;
-
-	/* For the SCCB.USBCFG register */
-	base = ioremap_nocache(USBCFG_ADDR, USBCFG_LEN);
-	if (base == NULL) {
-		retval = -EFAULT;
-		goto err;
-	}
-	lnw->usbcfg = base;
-
-	if (!pdev->irq) {
-		dev_dbg(&pdev->dev, "No IRQ.\n");
-		retval = -ENODEV;
-		goto err;
-	}
-
-	lnw->qwork = create_singlethread_workqueue(qname);
-	if (!lnw->qwork) {
-		dev_dbg(&pdev->dev, "cannot create workqueue %s\n", qname);
-		retval = -ENOMEM;
-		goto err;
-	}
-	INIT_WORK(&lnw->work, langwell_otg_work);
-
-	/* OTG common part */
-	lnw->dev = &pdev->dev;
-	lnw->iotg.otg.dev = lnw->dev;
-	lnw->iotg.otg.label = driver_name;
-	lnw->iotg.otg.set_host = langwell_otg_set_host;
-	lnw->iotg.otg.set_peripheral = langwell_otg_set_peripheral;
-	lnw->iotg.otg.set_power = langwell_otg_set_power;
-	lnw->iotg.otg.set_vbus = langwell_otg_set_vbus;
-	lnw->iotg.otg.start_srp = langwell_otg_start_srp;
-	lnw->iotg.otg.state = OTG_STATE_UNDEFINED;
-
-	if (otg_set_transceiver(&lnw->iotg.otg)) {
-		dev_dbg(lnw->dev, "can't set transceiver\n");
-		retval = -EBUSY;
-		goto err;
-	}
-
-	reset_otg();
-	init_hsm();
-
-	spin_lock_init(&lnw->lock);
-	spin_lock_init(&lnw->wq_lock);
-	INIT_LIST_HEAD(&active_timers);
-	retval = langwell_otg_init_timers(&lnw->iotg.hsm);
-	if (retval) {
-		dev_dbg(&pdev->dev, "Failed to init timers\n");
-		goto err;
-	}
-
-	init_timer(&lnw->hsm_timer);
-	ATOMIC_INIT_NOTIFIER_HEAD(&lnw->iotg.iotg_notifier);
-
-	lnw->iotg_notifier.notifier_call = langwell_otg_iotg_notify;
-
-	retval = intel_mid_otg_register_notifier(&lnw->iotg,
-						&lnw->iotg_notifier);
-	if (retval) {
-		dev_dbg(lnw->dev, "Failed to register notifier\n");
-		goto err;
-	}
-
-	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
-				driver_name, lnw) != 0) {
-		dev_dbg(lnw->dev, "request interrupt %d failed\n", pdev->irq);
-		retval = -EBUSY;
-		goto err;
-	}
-
-	/* enable OTGSC int */
-	val32 = OTGSC_DPIE | OTGSC_BSEIE | OTGSC_BSVIE |
-		OTGSC_ASVIE | OTGSC_AVVIE | OTGSC_IDIE | OTGSC_IDPU;
-	writel(val32, lnw->iotg.base + CI_OTGSC);
-
-	retval = device_create_file(&pdev->dev, &dev_attr_registers);
-	if (retval < 0) {
-		dev_dbg(lnw->dev,
-			"Can't register sysfs attribute: %d\n", retval);
-		goto err;
-	}
-
-	retval = device_create_file(&pdev->dev, &dev_attr_hsm);
-	if (retval < 0) {
-		dev_dbg(lnw->dev, "Can't hsm sysfs attribute: %d\n", retval);
-		goto err;
-	}
-
-	retval = sysfs_create_group(&pdev->dev.kobj, &debug_dev_attr_group);
-	if (retval < 0) {
-		dev_dbg(lnw->dev,
-			"Can't register sysfs attr group: %d\n", retval);
-		goto err;
-	}
-
-	if (lnw->iotg.otg.state == OTG_STATE_A_IDLE)
-		langwell_update_transceiver();
-
-	return 0;
-
-err:
-	if (the_transceiver)
-		langwell_otg_remove(pdev);
-done:
-	return retval;
-}
-
-static void langwell_otg_remove(struct pci_dev *pdev)
-{
-	struct langwell_otg *lnw = the_transceiver;
-
-	if (lnw->qwork) {
-		flush_workqueue(lnw->qwork);
-		destroy_workqueue(lnw->qwork);
-	}
-	intel_mid_otg_unregister_notifier(&lnw->iotg, &lnw->iotg_notifier);
-	langwell_otg_free_timers();
-
-	/* disable OTGSC interrupt as OTGSC doesn't change in reset */
-	writel(0, lnw->iotg.base + CI_OTGSC);
-
-	if (pdev->irq)
-		free_irq(pdev->irq, lnw);
-	if (lnw->usbcfg)
-		iounmap(lnw->usbcfg);
-	if (lnw->cfg_region)
-		release_mem_region(USBCFG_ADDR, USBCFG_LEN);
-	if (lnw->iotg.base)
-		iounmap(lnw->iotg.base);
-	if (lnw->region)
-		release_mem_region(pci_resource_start(pdev, 0),
-				pci_resource_len(pdev, 0));
-
-	otg_set_transceiver(NULL);
-	pci_disable_device(pdev);
-	sysfs_remove_group(&pdev->dev.kobj, &debug_dev_attr_group);
-	device_remove_file(&pdev->dev, &dev_attr_hsm);
-	device_remove_file(&pdev->dev, &dev_attr_registers);
-	kfree(lnw);
-	lnw = NULL;
-}
-
-static void transceiver_suspend(struct pci_dev *pdev)
-{
-	pci_save_state(pdev);
-	pci_set_power_state(pdev, PCI_D3hot);
-	langwell_otg_phy_low_power(1);
-}
-
-static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message)
-{
-	struct langwell_otg		*lnw = the_transceiver;
-	struct intel_mid_otg_xceiv	*iotg = &lnw->iotg;
-	int				ret = 0;
-
-	/* Disbale OTG interrupts */
-	langwell_otg_intr(0);
-
-	if (pdev->irq)
-		free_irq(pdev->irq, lnw);
-
-	/* Prevent more otg_work */
-	flush_workqueue(lnw->qwork);
-	destroy_workqueue(lnw->qwork);
-	lnw->qwork = NULL;
-
-	/* start actions */
-	switch (iotg->otg.state) {
-	case OTG_STATE_A_WAIT_VFALL:
-		iotg->otg.state = OTG_STATE_A_IDLE;
-	case OTG_STATE_A_IDLE:
-	case OTG_STATE_B_IDLE:
-	case OTG_STATE_A_VBUS_ERR:
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_WAIT_VRISE:
-		langwell_otg_del_timer(a_wait_vrise_tmr);
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_WAIT_BCON:
-		del_timer_sync(&lnw->hsm_timer);
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_HOST:
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_SUSPEND:
-		langwell_otg_del_timer(a_aidl_bdis_tmr);
-		langwell_otg_HABA(0);
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(lnw->dev, "host driver has been removed.\n");
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_PERIPHERAL:
-		del_timer_sync(&lnw->hsm_timer);
-
-		if (lnw->iotg.stop_peripheral)
-			lnw->iotg.stop_peripheral(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev,
-				"client driver has been removed.\n");
-		iotg->hsm.a_srp_det = 0;
-
-		/* Turn off VBus */
-		iotg->otg.set_vbus(&iotg->otg, false);
-		iotg->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_B_HOST:
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-		iotg->hsm.b_bus_req = 0;
-		iotg->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_B_PERIPHERAL:
-		if (lnw->iotg.stop_peripheral)
-			lnw->iotg.stop_peripheral(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev,
-				"client driver has been removed.\n");
-		iotg->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_B_WAIT_ACON:
-		/* delete hsm timer for b_ase0_brst_tmr */
-		del_timer_sync(&lnw->hsm_timer);
-
-		langwell_otg_HAAR(0);
-
-		if (lnw->iotg.stop_host)
-			lnw->iotg.stop_host(&lnw->iotg);
-		else
-			dev_dbg(&pdev->dev, "host driver has been removed.\n");
-		iotg->hsm.b_bus_req = 0;
-		iotg->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	default:
-		dev_dbg(lnw->dev, "error state before suspend\n");
-		break;
-	}
-
-	return ret;
-}
-
-static void transceiver_resume(struct pci_dev *pdev)
-{
-	pci_restore_state(pdev);
-	pci_set_power_state(pdev, PCI_D0);
-}
-
-static int langwell_otg_resume(struct pci_dev *pdev)
-{
-	struct langwell_otg	*lnw = the_transceiver;
-	int			ret = 0;
-
-	transceiver_resume(pdev);
-
-	lnw->qwork = create_singlethread_workqueue("langwell_otg_queue");
-	if (!lnw->qwork) {
-		dev_dbg(&pdev->dev, "cannot create langwell otg workqueuen");
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
-				driver_name, lnw) != 0) {
-		dev_dbg(&pdev->dev, "request interrupt %d failed\n", pdev->irq);
-		ret = -EBUSY;
-		goto error;
-	}
-
-	/* enable OTG interrupts */
-	langwell_otg_intr(1);
-
-	update_hsm();
-
-	langwell_update_transceiver();
-
-	return ret;
-error:
-	langwell_otg_intr(0);
-	transceiver_suspend(pdev);
-	return ret;
-}
-
-static int __init langwell_otg_init(void)
-{
-	return pci_register_driver(&otg_pci_driver);
-}
-module_init(langwell_otg_init);
-
-static void __exit langwell_otg_cleanup(void)
-{
-	pci_unregister_driver(&otg_pci_driver);
-}
-module_exit(langwell_otg_cleanup);
diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h
deleted file mode 100644
index 51f17b16d312..000000000000
--- a/include/linux/usb/langwell_otg.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008 - 2010, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#ifndef __LANGWELL_OTG_H
-#define __LANGWELL_OTG_H
-
-#include <linux/usb/intel_mid_otg.h>
-
-#define CI_USBCMD		0x30
-#	define USBCMD_RST		BIT(1)
-#	define USBCMD_RS		BIT(0)
-#define CI_USBSTS		0x34
-#	define USBSTS_SLI		BIT(8)
-#	define USBSTS_URI		BIT(6)
-#	define USBSTS_PCI		BIT(2)
-#define CI_PORTSC1		0x74
-#	define PORTSC_PP		BIT(12)
-#	define PORTSC_LS		(BIT(11) | BIT(10))
-#	define PORTSC_SUSP		BIT(7)
-#	define PORTSC_CCS		BIT(0)
-#define CI_HOSTPC1		0xb4
-#	define HOSTPC1_PHCD		BIT(22)
-#define CI_OTGSC		0xf4
-#	define OTGSC_DPIE		BIT(30)
-#	define OTGSC_1MSE		BIT(29)
-#	define OTGSC_BSEIE		BIT(28)
-#	define OTGSC_BSVIE		BIT(27)
-#	define OTGSC_ASVIE		BIT(26)
-#	define OTGSC_AVVIE		BIT(25)
-#	define OTGSC_IDIE		BIT(24)
-#	define OTGSC_DPIS		BIT(22)
-#	define OTGSC_1MSS		BIT(21)
-#	define OTGSC_BSEIS		BIT(20)
-#	define OTGSC_BSVIS		BIT(19)
-#	define OTGSC_ASVIS		BIT(18)
-#	define OTGSC_AVVIS		BIT(17)
-#	define OTGSC_IDIS		BIT(16)
-#	define OTGSC_DPS		BIT(14)
-#	define OTGSC_1MST		BIT(13)
-#	define OTGSC_BSE		BIT(12)
-#	define OTGSC_BSV		BIT(11)
-#	define OTGSC_ASV		BIT(10)
-#	define OTGSC_AVV		BIT(9)
-#	define OTGSC_ID			BIT(8)
-#	define OTGSC_HABA		BIT(7)
-#	define OTGSC_HADP		BIT(6)
-#	define OTGSC_IDPU		BIT(5)
-#	define OTGSC_DP			BIT(4)
-#	define OTGSC_OT			BIT(3)
-#	define OTGSC_HAAR		BIT(2)
-#	define OTGSC_VC			BIT(1)
-#	define OTGSC_VD			BIT(0)
-#	define OTGSC_INTEN_MASK		(0x7f << 24)
-#	define OTGSC_INT_MASK		(0x5f << 24)
-#	define OTGSC_INTSTS_MASK	(0x7f << 16)
-#define CI_USBMODE		0xf8
-#	define USBMODE_CM		(BIT(1) | BIT(0))
-#	define USBMODE_IDLE		0
-#	define USBMODE_DEVICE		0x2
-#	define USBMODE_HOST		0x3
-#define USBCFG_ADDR			0xff10801c
-#define USBCFG_LEN			4
-#	define USBCFG_VBUSVAL		BIT(14)
-#	define USBCFG_AVALID		BIT(13)
-#	define USBCFG_BVALID		BIT(12)
-#	define USBCFG_SESEND		BIT(11)
-
-#define INTR_DUMMY_MASK (USBSTS_SLI | USBSTS_URI | USBSTS_PCI)
-
-enum langwell_otg_timer_type {
-	TA_WAIT_VRISE_TMR,
-	TA_WAIT_BCON_TMR,
-	TA_AIDL_BDIS_TMR,
-	TB_ASE0_BRST_TMR,
-	TB_SE0_SRP_TMR,
-	TB_SRP_INIT_TMR,
-	TB_SRP_FAIL_TMR,
-	TB_BUS_SUSPEND_TMR
-};
-
-#define TA_WAIT_VRISE	100
-#define TA_WAIT_BCON	30000
-#define TA_AIDL_BDIS	15000
-#define TB_ASE0_BRST	5000
-#define TB_SE0_SRP	2
-#define TB_SRP_INIT	100
-#define TB_SRP_FAIL	5500
-#define TB_BUS_SUSPEND	500
-
-struct langwell_otg_timer {
-	unsigned long expires;	/* Number of count increase to timeout */
-	unsigned long count;	/* Tick counter */
-	void (*function)(unsigned long);	/* Timeout function */
-	unsigned long data;	/* Data passed to function */
-	struct list_head list;
-};
-
-struct langwell_otg {
-	struct intel_mid_otg_xceiv	iotg;
-	struct device			*dev;
-
-	void __iomem			*usbcfg;	/* SCCBUSB config Reg */
-
-	unsigned			region;
-	unsigned			cfg_region;
-
-	struct work_struct		work;
-	struct workqueue_struct		*qwork;
-	struct timer_list		hsm_timer;
-
-	spinlock_t			lock;
-	spinlock_t			wq_lock;
-
-	struct notifier_block		iotg_notifier;
-};
-
-static inline
-struct langwell_otg *mid_xceiv_to_lnw(struct intel_mid_otg_xceiv *iotg)
-{
-	return container_of(iotg, struct langwell_otg, iotg);
-}
-
-#endif /* __LANGWELL_OTG_H__ */
-- 
cgit v1.2.3


From 1a5e29fc2b90daf71a60329c29a1886fd126169a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:51 -0800
Subject: kernel-doc: fix new warnings in device.h

Fix new kernel-doc warnings:

Warning(include/linux/device.h:299): No description found for parameter 'name'
Warning(include/linux/device.h:299): No description found for parameter 'subsys'
Warning(include/linux/device.h:299): No description found for parameter 'node'
Warning(include/linux/device.h:299): No description found for parameter 'add_dev'
Warning(include/linux/device.h:299): No description found for parameter 'remove_dev'
Warning(include/linux/device.h:685): No description found for parameter 'id'
Warning(include/linux/device.h:1009): No description found for parameter '__driver'
Warning(include/linux/device.h:1009): No description found for parameter '__register'
Warning(include/linux/device.h:1009): No description found for parameter '__unregister'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 5b3adb8f9588..b63fb393aa58 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -279,11 +279,11 @@ struct device *driver_find_device(struct device_driver *drv,
 
 /**
  * struct subsys_interface - interfaces to device functions
- * @name        name of the device function
- * @subsystem   subsytem of the devices to attach to
- * @node        the list of functions registered at the subsystem
- * @add         device hookup to device function handler
- * @remove      device hookup to device function handler
+ * @name:       name of the device function
+ * @subsys:     subsytem of the devices to attach to
+ * @node:       the list of functions registered at the subsystem
+ * @add_dev:    device hookup to device function handler
+ * @remove_dev: device hookup to device function handler
  *
  * Simple interfaces attached to a subsystem. Multiple interfaces can
  * attach to a subsystem and its devices. Unlike drivers, they do not
@@ -612,6 +612,7 @@ struct device_dma_parameters {
  * @archdata:	For arch-specific additions.
  * @of_node:	Associated device tree node.
  * @devt:	For creating the sysfs "dev".
+ * @id:		device instance
  * @devres_lock: Spinlock to protect the resource of the device.
  * @devres_head: The resources list of the device.
  * @knode_class: The node used to add the device to the class list.
@@ -1003,6 +1004,10 @@ extern long sysfs_deprecated;
  * Each module may only use this macro once, and calling it replaces
  * module_init() and module_exit().
  *
+ * @__driver: driver name
+ * @__register: register function for this driver type
+ * @__unregister: unregister function for this driver type
+ *
  * Use this macro to construct bus specific macros for registering
  * drivers, and do not use it on its own.
  */
-- 
cgit v1.2.3


From 0fcd97789028e8ec286a4248c20a71eae239ba61 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 21 Jan 2012 11:02:56 -0800
Subject: kernel-doc: fix new warning in usb.h

Fix new kernel-doc warning:

Warning(include/linux/usb.h:1251): No description found for parameter 'num_mapped_sgs'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 27a4e16d2bf1..69d845739bc2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1073,6 +1073,7 @@ typedef void (*usb_complete_t)(struct urb *);
  *	which the host controller driver should use in preference to the
  *	transfer_buffer.
  * @sg: scatter gather buffer list
+ * @num_mapped_sgs: (internal) number of mapped sg entries
  * @num_sgs: number of entries in the sg list
  * @transfer_buffer_length: How big is transfer_buffer.  The transfer may
  *	be broken up into chunks according to the current maximum packet
-- 
cgit v1.2.3


From b82b9183d4f18f9b8c4bb31f223eb6c79b734eb0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 24 Jan 2012 05:16:00 +0000
Subject: team: send only changed options/ports via netlink

This patch changes event message behaviour to send only updated records
instead of whole list. This fixes bug on which userspace receives non-actual
data in case multiple events occur in row.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 136 ++++++++++++++++++++++++++++++++----------------
 include/linux/if_team.h |  10 ++++
 2 files changed, 100 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index ed2a862b835d..6b678f38e5ce 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -92,9 +92,9 @@ struct team_option *__team_find_option(struct team *team, const char *opt_name)
 	return NULL;
 }
 
-int team_options_register(struct team *team,
-			  const struct team_option *option,
-			  size_t option_count)
+int __team_options_register(struct team *team,
+			    const struct team_option *option,
+			    size_t option_count)
 {
 	int i;
 	struct team_option **dst_opts;
@@ -116,8 +116,11 @@ int team_options_register(struct team *team,
 		}
 	}
 
-	for (i = 0; i < option_count; i++)
+	for (i = 0; i < option_count; i++) {
+		dst_opts[i]->changed = true;
+		dst_opts[i]->removed = false;
 		list_add_tail(&dst_opts[i]->list, &team->option_list);
+	}
 
 	kfree(dst_opts);
 	return 0;
@@ -130,10 +133,22 @@ rollback:
 	return err;
 }
 
-EXPORT_SYMBOL(team_options_register);
+static void __team_options_mark_removed(struct team *team,
+					const struct team_option *option,
+					size_t option_count)
+{
+	int i;
+
+	for (i = 0; i < option_count; i++, option++) {
+		struct team_option *del_opt;
 
-static void __team_options_change_check(struct team *team,
-					struct team_option *changed_option);
+		del_opt = __team_find_option(team, option->name);
+		if (del_opt) {
+			del_opt->changed = true;
+			del_opt->removed = true;
+		}
+	}
+}
 
 static void __team_options_unregister(struct team *team,
 				      const struct team_option *option,
@@ -152,12 +167,29 @@ static void __team_options_unregister(struct team *team,
 	}
 }
 
+static void __team_options_change_check(struct team *team);
+
+int team_options_register(struct team *team,
+			  const struct team_option *option,
+			  size_t option_count)
+{
+	int err;
+
+	err = __team_options_register(team, option, option_count);
+	if (err)
+		return err;
+	__team_options_change_check(team);
+	return 0;
+}
+EXPORT_SYMBOL(team_options_register);
+
 void team_options_unregister(struct team *team,
 			     const struct team_option *option,
 			     size_t option_count)
 {
+	__team_options_mark_removed(team, option, option_count);
+	__team_options_change_check(team);
 	__team_options_unregister(team, option, option_count);
-	__team_options_change_check(team, NULL);
 }
 EXPORT_SYMBOL(team_options_unregister);
 
@@ -176,7 +208,8 @@ static int team_option_set(struct team *team, struct team_option *option,
 	if (err)
 		return err;
 
-	__team_options_change_check(team, option);
+	option->changed = true;
+	__team_options_change_check(team);
 	return err;
 }
 
@@ -653,6 +686,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
 		return -ENOENT;
 	}
 
+	port->removed = true;
 	__team_port_change_check(port, false);
 	team_port_list_del_port(team, port);
 	team_adjust_ops(team);
@@ -1200,10 +1234,9 @@ err_fill:
 	return err;
 }
 
-static int team_nl_fill_options_get_changed(struct sk_buff *skb,
-					    u32 pid, u32 seq, int flags,
-					    struct team *team,
-					    struct team_option *changed_option)
+static int team_nl_fill_options_get(struct sk_buff *skb,
+				    u32 pid, u32 seq, int flags,
+				    struct team *team, bool fillall)
 {
 	struct nlattr *option_list;
 	void *hdr;
@@ -1223,12 +1256,19 @@ static int team_nl_fill_options_get_changed(struct sk_buff *skb,
 		struct nlattr *option_item;
 		long arg;
 
+		/* Include only changed options if fill all mode is not on */
+		if (!fillall && !option->changed)
+			continue;
 		option_item = nla_nest_start(skb, TEAM_ATTR_ITEM_OPTION);
 		if (!option_item)
 			goto nla_put_failure;
 		NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_NAME, option->name);
-		if (option == changed_option)
+		if (option->changed) {
 			NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_CHANGED);
+			option->changed = false;
+		}
+		if (option->removed)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_REMOVED);
 		switch (option->type) {
 		case TEAM_OPTION_TYPE_U32:
 			NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_U32);
@@ -1255,13 +1295,13 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int team_nl_fill_options_get(struct sk_buff *skb,
-				    struct genl_info *info, int flags,
-				    struct team *team)
+static int team_nl_fill_options_get_all(struct sk_buff *skb,
+					struct genl_info *info, int flags,
+					struct team *team)
 {
-	return team_nl_fill_options_get_changed(skb, info->snd_pid,
-						info->snd_seq, NLM_F_ACK,
-						team, NULL);
+	return team_nl_fill_options_get(skb, info->snd_pid,
+					info->snd_seq, NLM_F_ACK,
+					team, true);
 }
 
 static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
@@ -1273,7 +1313,7 @@ static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
 	if (!team)
 		return -EINVAL;
 
-	err = team_nl_send_generic(info, team, team_nl_fill_options_get);
+	err = team_nl_send_generic(info, team, team_nl_fill_options_get_all);
 
 	team_nl_team_put(team);
 
@@ -1365,10 +1405,10 @@ team_put:
 	return err;
 }
 
-static int team_nl_fill_port_list_get_changed(struct sk_buff *skb,
-					      u32 pid, u32 seq, int flags,
-					      struct team *team,
-					      struct team_port *changed_port)
+static int team_nl_fill_port_list_get(struct sk_buff *skb,
+				      u32 pid, u32 seq, int flags,
+				      struct team *team,
+				      bool fillall)
 {
 	struct nlattr *port_list;
 	void *hdr;
@@ -1387,12 +1427,19 @@ static int team_nl_fill_port_list_get_changed(struct sk_buff *skb,
 	list_for_each_entry(port, &team->port_list, list) {
 		struct nlattr *port_item;
 
+		/* Include only changed ports if fill all mode is not on */
+		if (!fillall && !port->changed)
+			continue;
 		port_item = nla_nest_start(skb, TEAM_ATTR_ITEM_PORT);
 		if (!port_item)
 			goto nla_put_failure;
 		NLA_PUT_U32(skb, TEAM_ATTR_PORT_IFINDEX, port->dev->ifindex);
-		if (port == changed_port)
+		if (port->changed) {
 			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_CHANGED);
+			port->changed = false;
+		}
+		if (port->removed)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_REMOVED);
 		if (port->linkup)
 			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_LINKUP);
 		NLA_PUT_U32(skb, TEAM_ATTR_PORT_SPEED, port->speed);
@@ -1408,13 +1455,13 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int team_nl_fill_port_list_get(struct sk_buff *skb,
-				      struct genl_info *info, int flags,
-				      struct team *team)
+static int team_nl_fill_port_list_get_all(struct sk_buff *skb,
+					  struct genl_info *info, int flags,
+					  struct team *team)
 {
-	return team_nl_fill_port_list_get_changed(skb, info->snd_pid,
-						  info->snd_seq, NLM_F_ACK,
-						  team, NULL);
+	return team_nl_fill_port_list_get(skb, info->snd_pid,
+					  info->snd_seq, NLM_F_ACK,
+					  team, true);
 }
 
 static int team_nl_cmd_port_list_get(struct sk_buff *skb,
@@ -1427,7 +1474,7 @@ static int team_nl_cmd_port_list_get(struct sk_buff *skb,
 	if (!team)
 		return -EINVAL;
 
-	err = team_nl_send_generic(info, team, team_nl_fill_port_list_get);
+	err = team_nl_send_generic(info, team, team_nl_fill_port_list_get_all);
 
 	team_nl_team_put(team);
 
@@ -1464,8 +1511,7 @@ static struct genl_multicast_group team_change_event_mcgrp = {
 	.name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME,
 };
 
-static int team_nl_send_event_options_get(struct team *team,
-					  struct team_option *changed_option)
+static int team_nl_send_event_options_get(struct team *team)
 {
 	struct sk_buff *skb;
 	int err;
@@ -1475,8 +1521,7 @@ static int team_nl_send_event_options_get(struct team *team,
 	if (!skb)
 		return -ENOMEM;
 
-	err = team_nl_fill_options_get_changed(skb, 0, 0, 0, team,
-					       changed_option);
+	err = team_nl_fill_options_get(skb, 0, 0, 0, team, false);
 	if (err < 0)
 		goto err_fill;
 
@@ -1489,18 +1534,17 @@ err_fill:
 	return err;
 }
 
-static int team_nl_send_event_port_list_get(struct team_port *port)
+static int team_nl_send_event_port_list_get(struct team *team)
 {
 	struct sk_buff *skb;
 	int err;
-	struct net *net = dev_net(port->team->dev);
+	struct net *net = dev_net(team->dev);
 
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
-	err = team_nl_fill_port_list_get_changed(skb, 0, 0, 0,
-						 port->team, port);
+	err = team_nl_fill_port_list_get(skb, 0, 0, 0, team, false);
 	if (err < 0)
 		goto err_fill;
 
@@ -1544,12 +1588,11 @@ static void team_nl_fini(void)
  * Change checkers
  ******************/
 
-static void __team_options_change_check(struct team *team,
-					struct team_option *changed_option)
+static void __team_options_change_check(struct team *team)
 {
 	int err;
 
-	err = team_nl_send_event_options_get(team, changed_option);
+	err = team_nl_send_event_options_get(team);
 	if (err)
 		netdev_warn(team->dev, "Failed to send options change via netlink\n");
 }
@@ -1559,9 +1602,10 @@ static void __team_port_change_check(struct team_port *port, bool linkup)
 {
 	int err;
 
-	if (port->linkup == linkup)
+	if (!port->removed && port->linkup == linkup)
 		return;
 
+	port->changed = true;
 	port->linkup = linkup;
 	if (linkup) {
 		struct ethtool_cmd ecmd;
@@ -1577,7 +1621,7 @@ static void __team_port_change_check(struct team_port *port, bool linkup)
 	port->duplex = 0;
 
 send_event:
-	err = team_nl_send_event_port_list_get(port);
+	err = team_nl_send_event_port_list_get(port->team);
 	if (err)
 		netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink\n",
 			    port->dev->name);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 828181fbad5d..58404b0c5010 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -46,6 +46,10 @@ struct team_port {
 	u32 speed;
 	u8 duplex;
 
+	/* Custom gennetlink interface related flags */
+	bool changed;
+	bool removed;
+
 	struct rcu_head rcu;
 };
 
@@ -72,6 +76,10 @@ struct team_option {
 	enum team_option_type type;
 	int (*getter)(struct team *team, void *arg);
 	int (*setter)(struct team *team, void *arg);
+
+	/* Custom gennetlink interface related flags */
+	bool changed;
+	bool removed;
 };
 
 struct team_mode {
@@ -207,6 +215,7 @@ enum {
 	TEAM_ATTR_OPTION_CHANGED,	/* flag */
 	TEAM_ATTR_OPTION_TYPE,		/* u8 */
 	TEAM_ATTR_OPTION_DATA,		/* dynamic */
+	TEAM_ATTR_OPTION_REMOVED,	/* flag */
 
 	__TEAM_ATTR_OPTION_MAX,
 	TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1,
@@ -227,6 +236,7 @@ enum {
 	TEAM_ATTR_PORT_LINKUP,		/* flag */
 	TEAM_ATTR_PORT_SPEED,		/* u32 */
 	TEAM_ATTR_PORT_DUPLEX,		/* u8 */
+	TEAM_ATTR_PORT_REMOVED,		/* flag */
 
 	__TEAM_ATTR_PORT_MAX,
 	TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1,
-- 
cgit v1.2.3


From e050e3f0a71bf7dc2c148b35caff0234decc8198 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 26 Jan 2012 17:03:19 +0100
Subject: perf: Fix broken interrupt rate throttling

This patch fixes the sampling interrupt throttling mechanism.

It was broken in v3.2. Events were not being unthrottled. The
unthrottling mechanism required that events be checked at each
timer tick.

This patch solves this problem and also separates:

  - unthrottling
  - multiplexing
  - frequency-mode period adjustments

Not all of them need to be executed at each timer tick.

This third version of the patch is based on my original patch +
PeterZ proposal (https://lkml.org/lkml/2012/1/7/87).

At each timer tick, for each context:

  - if the current CPU has throttled events, we unthrottle events

  - if context has frequency-based events, we adjust sampling periods

  - if we have reached the jiffies interval, we multiplex (rotate)

We decoupled rotation (multiplexing) from frequency-mode sampling
period adjustments.  They should not necessarily happen at the same
rate. Multiplexing is subject to jiffies_interval (currently at 1
but could be higher once the tunable is exposed via sysfs).

We have grouped frequency-mode adjustment and unthrottling into the
same routine to minimize code duplication. When throttled while in
frequency mode, we scan the events only once.

We have fixed the threshold enforcement code in __perf_event_overflow().
There was a bug whereby it would allow more than the authorized rate
because an increment of hwc->interrupts was not executed at the right
place.

The patch was tested with low sampling limit (2000) and fixed periods,
frequency mode, overcommitted PMU.

On a 2.1GHz AMD CPU:

 $ cat /proc/sys/kernel/perf_event_max_sample_rate
 2000

We set a rate of 3000 samples/sec (2.1GHz/3000 = 700000):

 $ perf record -e cycles,cycles -c 700000  noploop 10
 $ perf report -D | tail -21

 Aggregated stats:
           TOTAL events:      80086
            MMAP events:         88
            COMM events:          2
            EXIT events:          4
        THROTTLE events:      19996
      UNTHROTTLE events:      19996
          SAMPLE events:      40000

 cycles stats:
           TOTAL events:      40006
            MMAP events:          5
            COMM events:          1
            EXIT events:          4
        THROTTLE events:       9998
      UNTHROTTLE events:       9998
          SAMPLE events:      20000

 cycles stats:
           TOTAL events:      39996
        THROTTLE events:       9998
      UNTHROTTLE events:       9998
          SAMPLE events:      20000

For 10s, the cap is 2x2000x10 = 40000 samples.
We get exactly that: 20000 samples/event.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: <stable@kernel.org> # v3.2+
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120126160319.GA5655@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |   1 +
 kernel/events/core.c       | 104 ++++++++++++++++++++++++++++-----------------
 2 files changed, 67 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 08855613ceb3..abb2776be1ba 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -587,6 +587,7 @@ struct hw_perf_event {
 	u64				sample_period;
 	u64				last_period;
 	local64_t			period_left;
+	u64                             interrupts_seq;
 	u64				interrupts;
 
 	u64				freq_time_stamp;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 32b48c889711..ba36013cfb21 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2300,6 +2300,9 @@ do {					\
 	return div64_u64(dividend, divisor);
 }
 
+static DEFINE_PER_CPU(int, perf_throttled_count);
+static DEFINE_PER_CPU(u64, perf_throttled_seq);
+
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 	}
 }
 
-static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
+/*
+ * combine freq adjustment with unthrottling to avoid two passes over the
+ * events. At the same time, make sure, having freq events does not change
+ * the rate of unthrottling as that would introduce bias.
+ */
+static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
+					   int needs_unthr)
 {
 	struct perf_event *event;
 	struct hw_perf_event *hwc;
-	u64 interrupts, now;
+	u64 now, period = TICK_NSEC;
 	s64 delta;
 
-	if (!ctx->nr_freq)
+	/*
+	 * only need to iterate over all events iff:
+	 * - context have events in frequency mode (needs freq adjust)
+	 * - there are events to unthrottle on this cpu
+	 */
+	if (!(ctx->nr_freq || needs_unthr))
 		return;
 
+	raw_spin_lock(&ctx->lock);
+
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
 		if (event->state != PERF_EVENT_STATE_ACTIVE)
 			continue;
@@ -2344,13 +2360,8 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 
 		hwc = &event->hw;
 
-		interrupts = hwc->interrupts;
-		hwc->interrupts = 0;
-
-		/*
-		 * unthrottle events on the tick
-		 */
-		if (interrupts == MAX_INTERRUPTS) {
+		if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
+			hwc->interrupts = 0;
 			perf_log_throttle(event, 1);
 			event->pmu->start(event, 0);
 		}
@@ -2358,14 +2369,26 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
 		if (!event->attr.freq || !event->attr.sample_freq)
 			continue;
 
-		event->pmu->read(event);
+		/*
+		 * stop the event and update event->count
+		 */
+		event->pmu->stop(event, PERF_EF_UPDATE);
+
 		now = local64_read(&event->count);
 		delta = now - hwc->freq_count_stamp;
 		hwc->freq_count_stamp = now;
 
+		/*
+		 * restart the event
+		 * reload only if value has changed
+		 */
 		if (delta > 0)
 			perf_adjust_period(event, period, delta);
+
+		event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
 	}
+
+	raw_spin_unlock(&ctx->lock);
 }
 
 /*
@@ -2388,16 +2411,13 @@ static void rotate_ctx(struct perf_event_context *ctx)
  */
 static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 {
-	u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
 	struct perf_event_context *ctx = NULL;
-	int rotate = 0, remove = 1, freq = 0;
+	int rotate = 0, remove = 1;
 
 	if (cpuctx->ctx.nr_events) {
 		remove = 0;
 		if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
 			rotate = 1;
-		if (cpuctx->ctx.nr_freq)
-			freq = 1;
 	}
 
 	ctx = cpuctx->task_ctx;
@@ -2405,37 +2425,26 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
 		remove = 0;
 		if (ctx->nr_events != ctx->nr_active)
 			rotate = 1;
-		if (ctx->nr_freq)
-			freq = 1;
 	}
 
-	if (!rotate && !freq)
+	if (!rotate)
 		goto done;
 
 	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
 	perf_pmu_disable(cpuctx->ctx.pmu);
 
-	if (freq) {
-		perf_ctx_adjust_freq(&cpuctx->ctx, interval);
-		if (ctx)
-			perf_ctx_adjust_freq(ctx, interval);
-	}
-
-	if (rotate) {
-		cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
-		if (ctx)
-			ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
+	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
+	if (ctx)
+		ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
 
-		rotate_ctx(&cpuctx->ctx);
-		if (ctx)
-			rotate_ctx(ctx);
+	rotate_ctx(&cpuctx->ctx);
+	if (ctx)
+		rotate_ctx(ctx);
 
-		perf_event_sched_in(cpuctx, ctx, current);
-	}
+	perf_event_sched_in(cpuctx, ctx, current);
 
 	perf_pmu_enable(cpuctx->ctx.pmu);
 	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-
 done:
 	if (remove)
 		list_del_init(&cpuctx->rotation_list);
@@ -2445,10 +2454,22 @@ void perf_event_task_tick(void)
 {
 	struct list_head *head = &__get_cpu_var(rotation_list);
 	struct perf_cpu_context *cpuctx, *tmp;
+	struct perf_event_context *ctx;
+	int throttled;
 
 	WARN_ON(!irqs_disabled());
 
+	__this_cpu_inc(perf_throttled_seq);
+	throttled = __this_cpu_xchg(perf_throttled_count, 0);
+
 	list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
+		ctx = &cpuctx->ctx;
+		perf_adjust_freq_unthr_context(ctx, throttled);
+
+		ctx = cpuctx->task_ctx;
+		if (ctx)
+			perf_adjust_freq_unthr_context(ctx, throttled);
+
 		if (cpuctx->jiffies_interval == 1 ||
 				!(jiffies % cpuctx->jiffies_interval))
 			perf_rotate_context(cpuctx);
@@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event,
 {
 	int events = atomic_read(&event->event_limit);
 	struct hw_perf_event *hwc = &event->hw;
+	u64 seq;
 	int ret = 0;
 
 	/*
@@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event,
 	if (unlikely(!is_sampling_event(event)))
 		return 0;
 
-	if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
-		if (throttle) {
+	seq = __this_cpu_read(perf_throttled_seq);
+	if (seq != hwc->interrupts_seq) {
+		hwc->interrupts_seq = seq;
+		hwc->interrupts = 1;
+	} else {
+		hwc->interrupts++;
+		if (unlikely(throttle
+			     && hwc->interrupts >= max_samples_per_tick)) {
+			__this_cpu_inc(perf_throttled_count);
 			hwc->interrupts = MAX_INTERRUPTS;
 			perf_log_throttle(event, 0);
 			ret = 1;
 		}
-	} else
-		hwc->interrupts++;
+	}
 
 	if (event->attr.freq) {
 		u64 now = perf_clock();
-- 
cgit v1.2.3


From 181e9bdef37bfcaa41f3ab6c948a2a0d60a268b5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 29 Jan 2012 20:35:52 +0100
Subject: PM / Hibernate: Fix s2disk regression related to freezing workqueues

Commit 2aede851ddf08666f68ffc17be446420e9d2a056

  PM / Hibernate: Freeze kernel threads after preallocating memory

introduced a mechanism by which kernel threads were frozen after
the preallocation of hibernate image memory to avoid problems with
frozen kernel threads not responding to memory freeing requests.
However, it overlooked the s2disk code path in which the
SNAPSHOT_CREATE_IMAGE ioctl was run directly after SNAPSHOT_FREE,
which caused freeze_workqueues_begin() to BUG(), because it saw
that worqueues had been already frozen.

Although in principle this issue might be addressed by removing
the relevant BUG_ON() from freeze_workqueues_begin(), that would
reintroduce the very problem that commit 2aede851ddf08666f68ffc17be4
attempted to avoid into that particular code path.  For this reason,
to fix the issue at hand, introduce thaw_kernel_threads() and make
the SNAPSHOT_FREE ioctl execute it.

Special thanks to Srivatsa S. Bhat for detailed analysis of the
problem.

Reported-and-tested-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: stable@kernel.org
---
 include/linux/freezer.h |  2 ++
 kernel/power/process.c  | 19 +++++++++++++++++++
 kernel/power/user.c     |  9 +++++++++
 3 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 0ab54e16a91f..d09af4b67cf1 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -39,6 +39,7 @@ extern bool __refrigerator(bool check_kthr_stop);
 extern int freeze_processes(void);
 extern int freeze_kernel_threads(void);
 extern void thaw_processes(void);
+extern void thaw_kernel_threads(void);
 
 static inline bool try_to_freeze(void)
 {
@@ -174,6 +175,7 @@ static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
 static inline int freeze_kernel_threads(void) { return -ENOSYS; }
 static inline void thaw_processes(void) {}
+static inline void thaw_kernel_threads(void) {}
 
 static inline bool try_to_freeze(void) { return false; }
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 77274c9ba2f1..eeca00311f39 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -188,3 +188,22 @@ void thaw_processes(void)
 	printk("done.\n");
 }
 
+void thaw_kernel_threads(void)
+{
+	struct task_struct *g, *p;
+
+	pm_nosig_freezing = false;
+	printk("Restarting kernel threads ... ");
+
+	thaw_workqueues();
+
+	read_lock(&tasklist_lock);
+	do_each_thread(g, p) {
+		if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
+			__thaw_task(p);
+	} while_each_thread(g, p);
+	read_unlock(&tasklist_lock);
+
+	schedule();
+	printk("done.\n");
+}
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6b1ab7a88522..e5a21a857302 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -274,6 +274,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		swsusp_free();
 		memset(&data->handle, 0, sizeof(struct snapshot_handle));
 		data->ready = 0;
+		/*
+		 * It is necessary to thaw kernel threads here, because
+		 * SNAPSHOT_CREATE_IMAGE may be invoked directly after
+		 * SNAPSHOT_FREE.  In that case, if kernel threads were not
+		 * thawed, the preallocation of memory carried out by
+		 * hibernation_snapshot() might run into problems (i.e. it
+		 * might fail or even deadlock).
+		 */
+		thaw_kernel_threads();
 		break;
 
 	case SNAPSHOT_PREF_IMAGE_SIZE:
-- 
cgit v1.2.3


From 1a30871fe635d3e92972e6b93e39ff65bb57e52d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Mon, 16 Jan 2012 11:07:16 +0200
Subject: mtd: fix MTD suspend

Commits 3fe4bae88460869a8e553397cd9057a4ee7ca341 and
079c985e7a6f4ce60f931cebfdd5ee3c3 broke MTD suspend in 2 ways:

1. When the '->suspend' method is not present, we return -EOPNOTSUPP, but
   the callers of 'mtd_suspend()' expects 0 instead.
2. Checking of the 'mtd' parameter against NULL has been incorrectly removed
   in 'mtd_cls_suspend()'.

This patch fixes the breakages. This has been found, analyzed, reported
and tested by Rafael J. Wysocki <rjw@sisk.pl>.

Note, this patch is not needed in the stable tree because it causes a
regression introduced during the v3.3 merge window.

Reported-by: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Rafael J. Wysocki <rjw@sisk.pl>
Tested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 2 +-
 include/linux/mtd/mtd.h | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 6ae9ca01388b..9a9ce71a71fc 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -119,7 +119,7 @@ static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 {
 	struct mtd_info *mtd = dev_get_drvdata(dev);
 
-	return mtd_suspend(mtd);
+	return mtd ? mtd_suspend(mtd) : 0;
 }
 
 static int mtd_cls_resume(struct device *dev)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 1a81fde8f333..d8c7aad7331c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -427,9 +427,7 @@ static inline int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 
 static inline int mtd_suspend(struct mtd_info *mtd)
 {
-	if (!mtd->suspend)
-		return -EOPNOTSUPP;
-	return mtd->suspend(mtd);
+	return mtd->suspend ? mtd->suspend(mtd) : 0;
 }
 
 static inline void mtd_resume(struct mtd_info *mtd)
-- 
cgit v1.2.3


From 3310225dfc71a35a2cc9340c15c0e08b14b3c754 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 9 Jan 2012 11:53:50 -0600
Subject: lib: proportion: lower PROP_MAX_SHIFT to 32 on 64-bit kernel

PROP_MAX_SHIFT should be set to <=32 on 64-bit box. This fixes two bugs
in the below lines of bdi_dirty_limit():

	bdi_dirty *= numerator;
	do_div(bdi_dirty, denominator);

1) divide error: do_div() only uses the lower 32 bit of the denominator,
   which may trimmed to be 0 when PROP_MAX_SHIFT > 32.

2) overflow: (bdi_dirty * numerator) could easily overflow if numerator
   used up to 48 bits, leaving only 16 bits to bdi_dirty

Cc: <stable@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reported-by: Ilya Tumaykin <librarian_rus@yahoo.com>
Tested-by: Ilya Tumaykin <librarian_rus@yahoo.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/proportions.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index ef35bb73f69b..26a8a4ed9b07 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -81,7 +81,11 @@ void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
  * Limit the time part in order to ensure there are some bits left for the
  * cycle counter and fraction multiply.
  */
+#if BITS_PER_LONG == 32
 #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
+#else
+#define PROP_MAX_SHIFT (BITS_PER_LONG/2)
+#endif
 
 #define PROP_FRAC_SHIFT		(BITS_PER_LONG - PROP_MAX_SHIFT - 1)
 #define PROP_FRAC_BASE		(1UL << PROP_FRAC_SHIFT)
-- 
cgit v1.2.3


From 3cccd1543ab623a5065335bf08350e06ffc788ab Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Thu, 26 Jan 2012 19:13:16 +0200
Subject: lib/mpi: replaced MPI_NULL with normal NULL

MPI_NULL is replaced with normal NULL.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Reviewed-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/mpi.h | 2 --
 lib/mpi/mpicoder.c  | 8 ++++----
 lib/mpi/mpiutil.c   | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 06f88994ccaa..d02cca6cc8ce 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -57,8 +57,6 @@ struct gcry_mpi {
 
 typedef struct gcry_mpi *MPI;
 
-#define MPI_NULL NULL
-
 #define mpi_get_nlimbs(a)     ((a)->nlimbs)
 #define mpi_is_neg(a)	      ((a)->sign)
 
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 6116fc4990da..d7684aa7f65c 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -34,7 +34,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits)
 	uint8_t *frame, *fr_pt;
 	int i = 0, n;
 	size_t asnlen = DIM(asn);
-	MPI a = MPI_NULL;
+	MPI a = NULL;
 
 	if (SHA1_DIGEST_LENGTH + asnlen + 4 > nframe)
 		pr_info("MPI: can't encode a %d bit MD into a %d bits frame\n",
@@ -48,7 +48,7 @@ MPI do_encode_md(const void *sha_buffer, unsigned nbits)
 	 */
 	frame = kmalloc(nframe, GFP_KERNEL);
 	if (!frame)
-		return MPI_NULL;
+		return NULL;
 	n = 0;
 	frame[n++] = 0;
 	frame[n++] = 1;		/* block type */
@@ -92,7 +92,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 	int i, j;
 	unsigned nbits, nbytes, nlimbs, nread = 0;
 	mpi_limb_t a;
-	MPI val = MPI_NULL;
+	MPI val = NULL;
 
 	if (*ret_nread < 2)
 		goto leave;
@@ -109,7 +109,7 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 	nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
 	val = mpi_alloc(nlimbs);
 	if (!val)
-		return MPI_NULL;
+		return NULL;
 	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
 	i %= BYTES_PER_MPI_LIMB;
 	val->nbits = nbits;
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index eefc55d6b7f5..6bfc41f62b8f 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -135,7 +135,7 @@ int mpi_copy(MPI *copied, const MPI a)
 	size_t i;
 	MPI b;
 
-	*copied = MPI_NULL;
+	*copied = NULL;
 
 	if (a) {
 		b = mpi_alloc(a->nlimbs);
-- 
cgit v1.2.3


From e9c8d7a03e69093e4c33c5056a45c1233a42e8a4 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 18 Jan 2012 10:14:25 +0100
Subject: dma: sh_dma: not all SH DMAC implementations support MEMCPY

Add a flag to allow platforms to specify, whether a DMAC instance supports
the MEMCPY operation. To avoid regressions, preserve the current default.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Vinod Koul <vinod.koul@linux.intel.com>
---
 drivers/dma/shdma.c    | 3 ++-
 include/linux/sh_dma.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 54043cd831c8..812fd76e9c18 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -1262,7 +1262,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev)
 
 	INIT_LIST_HEAD(&shdev->common.channels);
 
-	dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+	if (!pdata->slave_only)
+		dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
 	if (pdata->slave && pdata->slave_num)
 		dma_cap_set(DMA_SLAVE, shdev->common.cap_mask);
 
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index 8cd7fe59cf1a..425450b980b8 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -70,6 +70,7 @@ struct sh_dmae_pdata {
 	unsigned int needs_tend_set:1;
 	unsigned int no_dmars:1;
 	unsigned int chclr_present:1;
+	unsigned int slave_only:1;
 };
 
 /* DMA register */
-- 
cgit v1.2.3


From b18db3d91234c03ad080d317878c7c77672ba326 Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Wed, 1 Feb 2012 09:12:24 -0800
Subject: Input: gpio_keys - fix struct device declared inside parameter list

A struct device parameter is used in the enable and disable callbacks to
distinguish between different gpio_keys devices.

Platforms that don't use these callbacks may not include struct device
at all, as seen on arch/arm/mach-s3c2410/mach-n30.c

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/gpio_keys.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index b5ca4b2c08ec..004ff33ab38e 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -1,6 +1,8 @@
 #ifndef _GPIO_KEYS_H
 #define _GPIO_KEYS_H
 
+struct device;
+
 struct gpio_keys_button {
 	/* Configuration parameters */
 	unsigned int code;	/* input event code (KEY_*, SW_*) */
-- 
cgit v1.2.3


From 7d731019218e49a9811f6d0adec4b1cfcb752bed Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Wed, 1 Feb 2012 11:10:24 -0800
Subject: mtd: fix merge conflict resolution breakage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes merge conflict resolution breakage introduced by merge
d3712b9dfcf4 ("Merge tag 'for-linus' of git://github.com/prasad-joshi/logfs_upstream").

The commit changed 'mtd_can_have_bb()' function and made it always
return zero, which is incorrect.  Instead, we need it to return whether
the underlying flash device can have bad eraseblocks or not.  UBI needs
this information because it affects how it handles the underlying flash.
E.g., if the underlying flash is NOR, it cannot have bad blocks and any
write or erase error is fatal, and all we can do is to switch to R/O
mode.  We do not need to reserve a pool of good eraseblocks for bad
eraseblocks handling, and so on.

This patch also removes 'mtd_can_have_bb()' invocations from Logfs to
ensure correct Logfs behavior.

I've tested that with this patch UBI works on top of NOR and NAND
flashes emulated by mtdram and nandsim correspondingly.

This patch is based on patch from Linus Torvalds.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Acked-by: Jörn Engel <joern@logfs.org>
Acked-by: Prasad Joshi <prasadjoshi.linux@gmail.com>
Acked-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/logfs/dev_mtd.c      | 6 ------
 include/linux/mtd/mtd.h | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index e97404d611e0..9c501449450d 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs)
 	filler_t *filler = logfs_mtd_readpage;
 	struct mtd_info *mtd = super->s_mtd;
 
-	if (!mtd_can_have_bb(mtd))
-		return NULL;
-
 	*ofs = 0;
 	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs += mtd->erasesize;
@@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs)
 	filler_t *filler = logfs_mtd_readpage;
 	struct mtd_info *mtd = super->s_mtd;
 
-	if (!mtd_can_have_bb(mtd))
-		return NULL;
-
 	*ofs = mtd->size - mtd->erasesize;
 	while (mtd_block_isbad(mtd, *ofs)) {
 		*ofs -= mtd->erasesize;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 221295208fd0..887ebe318c75 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -489,7 +489,7 @@ static inline int mtd_has_oob(const struct mtd_info *mtd)
 
 static inline int mtd_can_have_bb(const struct mtd_info *mtd)
 {
-	return 0;
+	return !!mtd->block_isbad;
 }
 
 	/* Kernel-side ioctl definitions */
-- 
cgit v1.2.3


From 504b61630ab65296b6c9113cce834574e8cc01de Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 31 Jan 2012 16:43:50 -0800
Subject: usb: ch9.h: usb_endpoint_maxp() uses __le16_to_cpu()

The usb/ch9.h will be installed to /usr/include/linux,
and be used from user space.
But le16_to_cpu() is only defined for kernel code.
Without this patch, user space compile will be broken.
Special thanks to Stefan Becker

Reported-by: Stefan Becker <chemobejk@gmail.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ch9.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 61b29057b054..3b6f628880f8 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -589,7 +589,7 @@ static inline int usb_endpoint_is_isoc_out(
  */
 static inline int usb_endpoint_maxp(const struct usb_endpoint_descriptor *epd)
 {
-	return le16_to_cpu(epd->wMaxPacketSize);
+	return __le16_to_cpu(epd->wMaxPacketSize);
 }
 
 /*-------------------------------------------------------------------------*/
-- 
cgit v1.2.3


From c31c151b1c4a29da4dc92212aa8648fb4f8557b9 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu, 2 Feb 2012 07:18:00 +0000
Subject: net/hyperv: Fix the page buffer when an RNDIS message goes beyond
 page boundary

There is a possible data corruption if an RNDIS message goes beyond page
boundary in the sending code path. This patch fixes the problem.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c   |  8 ++++----
 drivers/net/hyperv/rndis_filter.c | 13 +++++++++++++
 include/linux/hyperv.h            |  2 +-
 3 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 69193fcb7648..466c58a7353d 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -151,10 +151,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 	int ret;
 	unsigned int i, num_pages, npg_data;
 
-	/* Add multipage for skb->data and additional one for RNDIS */
+	/* Add multipages for skb->data and additional 2 for RNDIS */
 	npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
 		>> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
-	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 1;
+	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 2;
 
 	/* Allocate a netvsc packet based on # of frags. */
 	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
@@ -173,8 +173,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
 				sizeof(struct hv_netvsc_packet) +
 				    (num_pages * sizeof(struct hv_page_buffer));
 
-	/* Setup the rndis header */
-	packet->page_buf_cnt = num_pages;
+	/* If the rndis msg goes beyond 1 page, we will add 1 later */
+	packet->page_buf_cnt = num_pages - 1;
 
 	/* Initialize it from the skb */
 	packet->total_data_buflen = skb->len;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index dc2e3849573b..133b7fbf8595 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -797,6 +797,19 @@ int rndis_filter_send(struct hv_device *dev,
 			(unsigned long)rndisMessage & (PAGE_SIZE-1);
 	pkt->page_buf[0].len = rndisMessageSize;
 
+	/* Add one page_buf if the rndis msg goes beyond page boundary */
+	if (pkt->page_buf[0].offset + rndisMessageSize > PAGE_SIZE) {
+		int i;
+		for (i = pkt->page_buf_cnt; i > 1; i--)
+			pkt->page_buf[i] = pkt->page_buf[i-1];
+		pkt->page_buf_cnt++;
+		pkt->page_buf[0].len = PAGE_SIZE - pkt->page_buf[0].offset;
+		pkt->page_buf[1].pfn = virt_to_phys((void *)((ulong)
+			rndisMessage + pkt->page_buf[0].len)) >> PAGE_SHIFT;
+		pkt->page_buf[1].offset = 0;
+		pkt->page_buf[1].len = rndisMessageSize - pkt->page_buf[0].len;
+	}
+
 	/* Save the packet send completion and context */
 	filterPacket->completion = pkt->completion.send.send_completion;
 	filterPacket->completion_ctx =
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 62b908e0e591..0ae065a5fcb2 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -35,7 +35,7 @@
 #include <linux/mod_devicetable.h>
 
 
-#define MAX_PAGE_BUFFER_COUNT				18
+#define MAX_PAGE_BUFFER_COUNT				19
 #define MAX_MULTIPAGE_BUFFER_COUNT			32 /* 128K */
 
 #pragma pack(push, 1)
-- 
cgit v1.2.3


From 8cdb878dcb359fd1137e9abdee9322f5e9bcfdf8 Mon Sep 17 00:00:00 2001
From: Christopher Yeoh <cyeoh@au1.ibm.com>
Date: Thu, 2 Feb 2012 11:34:09 +1030
Subject: Fix race in process_vm_rw_core

This fixes the race in process_vm_core found by Oleg (see

  http://article.gmane.org/gmane.linux.kernel/1235667/

for details).

This has been updated since I last sent it as the creation of the new
mm_access() function did almost exactly the same thing as parts of the
previous version of this patch did.

In order to use mm_access() even when /proc isn't enabled, we move it to
kernel/fork.c where other related process mm access functions already
are.

Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c         | 20 --------------------
 include/linux/sched.h  |  6 ++++++
 kernel/fork.c          | 20 ++++++++++++++++++++
 mm/process_vm_access.c | 23 +++++++++--------------
 4 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index d9512bd03e6c..d4548dd49b02 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
 	return result;
 }
 
-static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
-{
-	struct mm_struct *mm;
-	int err;
-
-	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
-	if (err)
-		return ERR_PTR(err);
-
-	mm = get_task_mm(task);
-	if (mm && mm != current->mm &&
-			!ptrace_may_access(task, mode)) {
-		mmput(mm);
-		mm = ERR_PTR(-EACCES);
-	}
-	mutex_unlock(&task->signal->cred_guard_mutex);
-
-	return mm;
-}
-
 struct mm_struct *mm_for_maps(struct task_struct *task)
 {
 	return mm_access(task, PTRACE_MODE_READ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2234985a5e65..7d379a6bfd88 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2259,6 +2259,12 @@ static inline void mmdrop(struct mm_struct * mm)
 extern void mmput(struct mm_struct *);
 /* Grab a reference to a task's mm, if it is not already going away */
 extern struct mm_struct *get_task_mm(struct task_struct *task);
+/*
+ * Grab a reference to a task's mm, if it is not already going away
+ * and ptrace_may_access with the mode parameter passed to it
+ * succeeds.
+ */
+extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
 /* Allocate a new mm structure and copy contents from tsk->mm */
diff --git a/kernel/fork.c b/kernel/fork.c
index 051f090d40c1..1b2ef3c23ae4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -647,6 +647,26 @@ struct mm_struct *get_task_mm(struct task_struct *task)
 }
 EXPORT_SYMBOL_GPL(get_task_mm);
 
+struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
+{
+	struct mm_struct *mm;
+	int err;
+
+	err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
+	if (err)
+		return ERR_PTR(err);
+
+	mm = get_task_mm(task);
+	if (mm && mm != current->mm &&
+			!ptrace_may_access(task, mode)) {
+		mmput(mm);
+		mm = ERR_PTR(-EACCES);
+	}
+	mutex_unlock(&task->signal->cred_guard_mutex);
+
+	return mm;
+}
+
 /* Please note the differences between mmput and mm_release.
  * mmput is called whenever we stop holding onto a mm_struct,
  * error success whatever.
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index e920aa3ce104..c20ff48994c2 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -298,23 +298,18 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
 		goto free_proc_pages;
 	}
 
-	task_lock(task);
-	if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
-		task_unlock(task);
-		rc = -EPERM;
-		goto put_task_struct;
-	}
-	mm = task->mm;
-
-	if (!mm || (task->flags & PF_KTHREAD)) {
-		task_unlock(task);
-		rc = -EINVAL;
+	mm = mm_access(task, PTRACE_MODE_ATTACH);
+	if (!mm || IS_ERR(mm)) {
+		rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+		/*
+		 * Explicitly map EACCES to EPERM as EPERM is a more a
+		 * appropriate error code for process_vw_readv/writev
+		 */
+		if (rc == -EACCES)
+			rc = -EPERM;
 		goto put_task_struct;
 	}
 
-	atomic_inc(&mm->mm_users);
-	task_unlock(task);
-
 	for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) {
 		rc = process_vm_rw_single_vec(
 			(unsigned long)rvec[i].iov_base, rvec[i].iov_len,
-- 
cgit v1.2.3


From ff05f603c3238010769787f3ba54c48c290ed3e5 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Thu, 2 Feb 2012 15:29:08 -0800
Subject: include/linux/lp8727.h: Remove executable bit

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lp8727.h | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 include/linux/lp8727.h

(limited to 'include/linux')

diff --git a/include/linux/lp8727.h b/include/linux/lp8727.h
old mode 100755
new mode 100644
-- 
cgit v1.2.3


From f8447d6c213273b444c81eaa2449f55510229d4f Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Sat, 14 Jan 2012 20:58:43 +0100
Subject: mfd: Store twl6040-codec mclk configuration

Store the last used mclk configuration for the PLL.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/twl6040-core.c  | 3 +++
 include/linux/mfd/twl6040.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c
index dda86293dc9f..c2088f4c4547 100644
--- a/drivers/mfd/twl6040-core.c
+++ b/drivers/mfd/twl6040-core.c
@@ -282,6 +282,7 @@ int twl6040_power(struct twl6040 *twl6040, int on)
 		/* Default PLL configuration after power up */
 		twl6040->pll = TWL6040_SYSCLK_SEL_LPPLL;
 		twl6040->sysclk = 19200000;
+		twl6040->mclk = 32768;
 	} else {
 		/* already powered-down */
 		if (!twl6040->power_count) {
@@ -305,6 +306,7 @@ int twl6040_power(struct twl6040 *twl6040, int on)
 			twl6040_power_down(twl6040);
 		}
 		twl6040->sysclk = 0;
+		twl6040->mclk = 0;
 	}
 
 out:
@@ -421,6 +423,7 @@ int twl6040_set_pll(struct twl6040 *twl6040, int pll_id,
 	}
 
 	twl6040->sysclk = freq_out;
+	twl6040->mclk = freq_in;
 	twl6040->pll = pll_id;
 
 pll_out:
diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h
index 2463c2619596..9bc9ac651dad 100644
--- a/include/linux/mfd/twl6040.h
+++ b/include/linux/mfd/twl6040.h
@@ -187,8 +187,10 @@ struct twl6040 {
 	int rev;
 	u8 vibra_ctrl_cache[2];
 
+	/* PLL configuration */
 	int pll;
 	unsigned int sysclk;
+	unsigned int mclk;
 
 	unsigned int irq;
 	unsigned int irq_base;
-- 
cgit v1.2.3


From 331818f1c468a24e581aedcbe52af799366a9dfe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 3 Feb 2012 18:30:53 -0500
Subject: NFSv4: Fix an Oops in the NFSv4 getacl code

Commit bf118a342f10dafe44b14451a1392c3254629a1f (NFSv4: include bitmap
in nfsv4 get acl data) introduces the 'acl_scratch' page for the case
where we may need to decode multi-page data. However it fails to take
into account the fact that the variable may be NULL (for the case where
we're not doing multi-page decode), and it also attaches it to the
encoding xdr_stream rather than the decoding one.

The immediate result is an Oops in nfs4_xdr_enc_getacl due to the
call to page_address() with a NULL page pointer.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Andy Adamson <andros@netapp.com>
Cc: stable@vger.kernel.org
---
 fs/nfs/nfs4proc.c       | 8 ++++----
 fs/nfs/nfs4xdr.c        | 5 ++++-
 include/linux/nfs_xdr.h | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f0c849c98fe4..d202e04aca94 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 	}
 	if (npages > 1) {
 		/* for decoding across pages */
-		args.acl_scratch = alloc_page(GFP_KERNEL);
-		if (!args.acl_scratch)
+		res.acl_scratch = alloc_page(GFP_KERNEL);
+		if (!res.acl_scratch)
 			goto out_free;
 	}
 	args.acl_len = npages * PAGE_SIZE;
@@ -3612,8 +3612,8 @@ out_free:
 	for (i = 0; i < npages; i++)
 		if (pages[i])
 			__free_page(pages[i]);
-	if (args.acl_scratch)
-		__free_page(args.acl_scratch);
+	if (res.acl_scratch)
+		__free_page(res.acl_scratch);
 	return ret;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 95e92e438407..33bd8d0f745d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, args->acl_pgbase, args->acl_len);
-	xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
 
 	encode_nops(&hdr);
 }
@@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	struct compound_hdr hdr;
 	int status;
 
+	if (res->acl_scratch != NULL) {
+		void *p = page_address(res->acl_scratch);
+		xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
+	}
 	status = decode_compound_hdr(xdr, &hdr);
 	if (status)
 		goto out;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a764cef06b73..d6ba9a12591e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -614,7 +614,6 @@ struct nfs_getaclargs {
 	size_t				acl_len;
 	unsigned int			acl_pgbase;
 	struct page **			acl_pages;
-	struct page *			acl_scratch;
 	struct nfs4_sequence_args 	seq_args;
 };
 
@@ -624,6 +623,7 @@ struct nfs_getaclres {
 	size_t				acl_len;
 	size_t				acl_data_offset;
 	int				acl_flags;
+	struct page *			acl_scratch;
 	struct nfs4_sequence_res	seq_res;
 };
 
-- 
cgit v1.2.3


From d020283dc694c9ec31b410f522252f7a8397e67d Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venki@google.com>
Date: Fri, 3 Feb 2012 22:22:25 +0100
Subject: PM / QoS: CPU C-state breakage with PM Qos change

Looks like change "PM QoS: Move and rename the implementation files"
merged during the 3.2 development cycle made PM QoS depend on
CONFIG_PM which depends on (PM_SLEEP || PM_RUNTIME).

That breaks CPU C-states with kernels not having these CONFIGs, causing CPUs
to spend time in Polling loop idle instead of going into deep C-states,
consuming way way more power. This is with either acpi idle or intel idle
enabled.

Either CONFIG_PM should be enabled with any pm_qos users or
the !CONFIG_PM pm_qos_request() should return sane defaults not to break
the existing users. Here's is the patch for the latter option.

[rjw: Modified the changelog slightly.]

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@vger.kernel.org
---
 include/linux/pm_qos.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index e5bbcbaa6f57..4d99e4e6ef83 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -110,7 +110,19 @@ static inline void pm_qos_remove_request(struct pm_qos_request *req)
 			{ return; }
 
 static inline int pm_qos_request(int pm_qos_class)
-			{ return 0; }
+{
+	switch (pm_qos_class) {
+	case PM_QOS_CPU_DMA_LATENCY:
+		return PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE;
+	case PM_QOS_NETWORK_LATENCY:
+		return PM_QOS_NETWORK_LAT_DEFAULT_VALUE;
+	case PM_QOS_NETWORK_THROUGHPUT:
+		return PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE;
+	default:
+		return PM_QOS_DEFAULT_VALUE;
+	}
+}
+
 static inline int pm_qos_add_notifier(int pm_qos_class,
 				      struct notifier_block *notifier)
 			{ return 0; }
-- 
cgit v1.2.3


From 96e02d1586782eadf051fa3d6bc4132d2447ac2c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sat, 4 Feb 2012 10:47:10 +0100
Subject: exec: fix use-after-free bug in setup_new_exec()

Setting the task name is done within setup_new_exec() by accessing
bprm->filename. However this happens after flush_old_exec().
This may result in a use after free bug, flush_old_exec() may
"complete" vfork_done, which will wake up the parent which in turn
may free the passed in filename.
To fix this add a new tcomm field in struct linux_binprm which
contains the now early generated task name until it is used.

Fixes this bug on s390:

  Unable to handle kernel pointer dereference at virtual kernel address 0000000039768000
  Process kworker/u:3 (pid: 245, task: 000000003a3dc840, ksp: 0000000039453818)
  Krnl PSW : 0704000180000000 0000000000282e94 (setup_new_exec+0xa0/0x374)
  Call Trace:
  ([<0000000000282e2c>] setup_new_exec+0x38/0x374)
   [<00000000002dd12e>] load_elf_binary+0x402/0x1bf4
   [<0000000000280a42>] search_binary_handler+0x38e/0x5bc
   [<0000000000282b6c>] do_execve_common+0x410/0x514
   [<0000000000282cb6>] do_execve+0x46/0x58
   [<00000000005bce58>] kernel_execve+0x28/0x70
   [<000000000014ba2e>] ____call_usermodehelper+0x102/0x140
   [<00000000005bc8da>] kernel_thread_starter+0x6/0xc
   [<00000000005bc8d4>] kernel_thread_starter+0x0/0xc
  Last Breaking-Event-Address:
   [<00000000002830f0>] setup_new_exec+0x2fc/0x374

  Kernel panic - not syncing: Fatal exception: panic_on_oops

Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c               | 33 +++++++++++++++++----------------
 include/linux/binfmts.h |  3 ++-
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index aeb135c7ff5c..92ce83a11e90 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 	perf_event_comm(tsk);
 }
 
+static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
+{
+	int i, ch;
+
+	/* Copies the binary name from after last slash */
+	for (i = 0; (ch = *(fn++)) != '\0';) {
+		if (ch == '/')
+			i = 0; /* overwrite what we wrote */
+		else
+			if (i < len - 1)
+				tcomm[i++] = ch;
+	}
+	tcomm[i] = '\0';
+}
+
 int flush_old_exec(struct linux_binprm * bprm)
 {
 	int retval;
@@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 
 	set_mm_exe_file(bprm->mm, bprm->file);
 
+	filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
 	/*
 	 * Release all of the old mmap stuff
 	 */
@@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump);
 
 void setup_new_exec(struct linux_binprm * bprm)
 {
-	int i, ch;
-	const char *name;
-	char tcomm[sizeof(current->comm)];
-
 	arch_pick_mmap_layout(current->mm);
 
 	/* This is the point of no return */
@@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 	else
 		set_dumpable(current->mm, suid_dumpable);
 
-	name = bprm->filename;
-
-	/* Copies the binary name from after last slash */
-	for (i=0; (ch = *(name++)) != '\0';) {
-		if (ch == '/')
-			i = 0; /* overwrite what we wrote */
-		else
-			if (i < (sizeof(tcomm) - 1))
-				tcomm[i++] = ch;
-	}
-	tcomm[i] = '\0';
-	set_task_comm(current, tcomm);
+	set_task_comm(current, bprm->tcomm);
 
 	/* Set the new mm task size. We have to do that late because it may
 	 * depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index fd88a3945aa1..0092102db2de 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -18,7 +18,7 @@ struct pt_regs;
 #define BINPRM_BUF_SIZE 128
 
 #ifdef __KERNEL__
-#include <linux/list.h>
+#include <linux/sched.h>
 
 #define CORENAME_MAX_SIZE 128
 
@@ -58,6 +58,7 @@ struct linux_binprm {
 	unsigned interp_flags;
 	unsigned interp_data;
 	unsigned long loader, exec;
+	char tcomm[TASK_COMM_LEN];
 };
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
-- 
cgit v1.2.3


From 11a3122f6cf2d988a77eb8883d0fc49cd013a6d5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 7 Feb 2012 07:51:30 +0100
Subject: block: strip out locking optimization in put_io_context()

put_io_context() performed a complex trylock dancing to avoid
deferring ioc release to workqueue.  It was also broken on UP because
trylock was always assumed to succeed which resulted in unbalanced
preemption count.

While there are ways to fix the UP breakage, even the most
pathological microbench (forced ioc allocation and tight fork/exit
loop) fails to show any appreciable performance benefit of the
optimization.  Strip it out.  If there turns out to be workloads which
are affected by this change, simpler optimization from the discussion
thread can be applied later.

Signed-off-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <1328514611.21268.66.camel@sli10-conroe>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c        |  2 +-
 block/blk-core.c          |  2 +-
 block/blk-ioc.c           | 92 ++++++-----------------------------------------
 block/cfq-iosched.c       |  2 +-
 fs/ioprio.c               |  2 +-
 include/linux/blkdev.h    |  3 --
 include/linux/iocontext.h |  5 ++-
 kernel/fork.c             |  2 +-
 8 files changed, 18 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index fa8f26309444..75642a352a8f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1659,7 +1659,7 @@ static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 		ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
 		if (ioc) {
 			ioc_cgroup_changed(ioc);
-			put_io_context(ioc, NULL);
+			put_io_context(ioc);
 		}
 	}
 }
diff --git a/block/blk-core.c b/block/blk-core.c
index 636702575118..532b3a21b383 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -642,7 +642,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq)
 	if (rq->cmd_flags & REQ_ELVPRIV) {
 		elv_put_request(q, rq);
 		if (rq->elv.icq)
-			put_io_context(rq->elv.icq->ioc, q);
+			put_io_context(rq->elv.icq->ioc);
 	}
 
 	mempool_free(rq, q->rq.rq_pool);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 7490b6da2453..9884fd7427fe 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -29,21 +29,6 @@ void get_io_context(struct io_context *ioc)
 }
 EXPORT_SYMBOL(get_io_context);
 
-/*
- * Releasing ioc may nest into another put_io_context() leading to nested
- * fast path release.  As the ioc's can't be the same, this is okay but
- * makes lockdep whine.  Keep track of nesting and use it as subclass.
- */
-#ifdef CONFIG_LOCKDEP
-#define ioc_release_depth(q)		((q) ? (q)->ioc_release_depth : 0)
-#define ioc_release_depth_inc(q)	(q)->ioc_release_depth++
-#define ioc_release_depth_dec(q)	(q)->ioc_release_depth--
-#else
-#define ioc_release_depth(q)		0
-#define ioc_release_depth_inc(q)	do { } while (0)
-#define ioc_release_depth_dec(q)	do { } while (0)
-#endif
-
 static void icq_free_icq_rcu(struct rcu_head *head)
 {
 	struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
@@ -75,11 +60,8 @@ static void ioc_exit_icq(struct io_cq *icq)
 	if (rcu_dereference_raw(ioc->icq_hint) == icq)
 		rcu_assign_pointer(ioc->icq_hint, NULL);
 
-	if (et->ops.elevator_exit_icq_fn) {
-		ioc_release_depth_inc(q);
+	if (et->ops.elevator_exit_icq_fn)
 		et->ops.elevator_exit_icq_fn(icq);
-		ioc_release_depth_dec(q);
-	}
 
 	/*
 	 * @icq->q might have gone away by the time RCU callback runs
@@ -149,81 +131,29 @@ static void ioc_release_fn(struct work_struct *work)
 /**
  * put_io_context - put a reference of io_context
  * @ioc: io_context to put
- * @locked_q: request_queue the caller is holding queue_lock of (hint)
  *
  * Decrement reference count of @ioc and release it if the count reaches
- * zero.  If the caller is holding queue_lock of a queue, it can indicate
- * that with @locked_q.  This is an optimization hint and the caller is
- * allowed to pass in %NULL even when it's holding a queue_lock.
+ * zero.
  */
-void put_io_context(struct io_context *ioc, struct request_queue *locked_q)
+void put_io_context(struct io_context *ioc)
 {
-	struct request_queue *last_q = locked_q;
 	unsigned long flags;
 
 	if (ioc == NULL)
 		return;
 
 	BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
-	if (locked_q)
-		lockdep_assert_held(locked_q->queue_lock);
-
-	if (!atomic_long_dec_and_test(&ioc->refcount))
-		return;
 
 	/*
-	 * Destroy @ioc.  This is a bit messy because icq's are chained
-	 * from both ioc and queue, and ioc->lock nests inside queue_lock.
-	 * The inner ioc->lock should be held to walk our icq_list and then
-	 * for each icq the outer matching queue_lock should be grabbed.
-	 * ie. We need to do reverse-order double lock dancing.
-	 *
-	 * Another twist is that we are often called with one of the
-	 * matching queue_locks held as indicated by @locked_q, which
-	 * prevents performing double-lock dance for other queues.
-	 *
-	 * So, we do it in two stages.  The fast path uses the queue_lock
-	 * the caller is holding and, if other queues need to be accessed,
-	 * uses trylock to avoid introducing locking dependency.  This can
-	 * handle most cases, especially if @ioc was performing IO on only
-	 * single device.
-	 *
-	 * If trylock doesn't cut it, we defer to @ioc->release_work which
-	 * can do all the double-locking dancing.
+	 * Releasing ioc requires reverse order double locking and we may
+	 * already be holding a queue_lock.  Do it asynchronously from wq.
 	 */
-	spin_lock_irqsave_nested(&ioc->lock, flags,
-				 ioc_release_depth(locked_q));
-
-	while (!hlist_empty(&ioc->icq_list)) {
-		struct io_cq *icq = hlist_entry(ioc->icq_list.first,
-						struct io_cq, ioc_node);
-		struct request_queue *this_q = icq->q;
-
-		if (this_q != last_q) {
-			if (last_q && last_q != locked_q)
-				spin_unlock(last_q->queue_lock);
-			last_q = NULL;
-
-			/* spin_trylock() always successes in UP case */
-			if (this_q != locked_q &&
-			    !spin_trylock(this_q->queue_lock))
-				break;
-			last_q = this_q;
-			continue;
-		}
-		ioc_exit_icq(icq);
+	if (atomic_long_dec_and_test(&ioc->refcount)) {
+		spin_lock_irqsave(&ioc->lock, flags);
+		if (!hlist_empty(&ioc->icq_list))
+			schedule_work(&ioc->release_work);
+		spin_unlock_irqrestore(&ioc->lock, flags);
 	}
-
-	if (last_q && last_q != locked_q)
-		spin_unlock(last_q->queue_lock);
-
-	spin_unlock_irqrestore(&ioc->lock, flags);
-
-	/* if no icq is left, we're done; otherwise, kick release_work */
-	if (hlist_empty(&ioc->icq_list))
-		kmem_cache_free(iocontext_cachep, ioc);
-	else
-		schedule_work(&ioc->release_work);
 }
 EXPORT_SYMBOL(put_io_context);
 
@@ -238,7 +168,7 @@ void exit_io_context(struct task_struct *task)
 	task_unlock(task);
 
 	atomic_dec(&ioc->nr_tasks);
-	put_io_context(ioc, NULL);
+	put_io_context(ioc);
 }
 
 /**
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index da21c24dbed3..5684df6848bc 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1794,7 +1794,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		cfqd->active_queue = NULL;
 
 	if (cfqd->active_cic) {
-		put_io_context(cfqd->active_cic->icq.ioc, cfqd->queue);
+		put_io_context(cfqd->active_cic->icq.ioc);
 		cfqd->active_cic = NULL;
 	}
 }
diff --git a/fs/ioprio.c b/fs/ioprio.c
index f84b380d65e5..0f1b9515213b 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
 	ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
 	if (ioc) {
 		ioc_ioprio_changed(ioc, ioprio);
-		put_io_context(ioc, NULL);
+		put_io_context(ioc);
 	}
 
 	return err;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6c6a1f008065..606cf339bb56 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -399,9 +399,6 @@ struct request_queue {
 	/* Throttle data */
 	struct throtl_data *td;
 #endif
-#ifdef CONFIG_LOCKDEP
-	int			ioc_release_depth;
-#endif
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 7e1371c4bccf..119773eebe31 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -133,7 +133,7 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 
 struct task_struct;
 #ifdef CONFIG_BLOCK
-void put_io_context(struct io_context *ioc, struct request_queue *locked_q);
+void put_io_context(struct io_context *ioc);
 void exit_io_context(struct task_struct *task);
 struct io_context *get_task_io_context(struct task_struct *task,
 				       gfp_t gfp_flags, int node);
@@ -141,8 +141,7 @@ void ioc_ioprio_changed(struct io_context *ioc, int ioprio);
 void ioc_cgroup_changed(struct io_context *ioc);
 #else
 struct io_context;
-static inline void put_io_context(struct io_context *ioc,
-				  struct request_queue *locked_q) { }
+static inline void put_io_context(struct io_context *ioc) { }
 static inline void exit_io_context(struct task_struct *task) { }
 #endif
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 051f090d40c1..c574aefa8d1b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -890,7 +890,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 			return -ENOMEM;
 
 		new_ioc->ioprio = ioc->ioprio;
-		put_io_context(new_ioc, NULL);
+		put_io_context(new_ioc);
 	}
 #endif
 	return 0;
-- 
cgit v1.2.3


From 050c8ea80e3e90019d9e981c6a117ef614e882ed Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 8 Feb 2012 09:19:38 +0100
Subject: block: separate out blk_rq_merge_ok() and blk_try_merge() from
 elevator functions

blk_rq_merge_ok() is the elevator-neutral part of merge eligibility
test.  blk_try_merge() determines merge direction and expects the
caller to have tested elv_rq_merge_ok() previously.

elv_rq_merge_ok() now wraps blk_rq_merge_ok() and then calls
elv_iosched_allow_merge().  elv_try_merge() is removed and the two
callers are updated to call elv_rq_merge_ok() explicitly followed by
blk_try_merge().  While at it, make rq_merge_ok() functions return
bool.

This is to prepare for plug merge update and doesn't introduce any
behavior change.

This is based on Jens' patch to skip elevator_allow_merge_fn() from
plug merge.

Signed-off-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <4F16F3CA.90904@kernel.dk>
Original-patch-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c         |  4 ++--
 block/blk-merge.c        | 37 ++++++++++++++++++++++++++++++++
 block/blk.h              |  2 ++
 block/elevator.c         | 55 ++++--------------------------------------------
 include/linux/elevator.h |  3 +--
 5 files changed, 46 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 532b3a21b383..fa697bf691eb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1282,10 +1282,10 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 
 		(*request_count)++;
 
-		if (rq->q != q)
+		if (rq->q != q || !elv_rq_merge_ok(rq, bio))
 			continue;
 
-		el_ret = elv_try_merge(rq, bio);
+		el_ret = blk_try_merge(rq, bio);
 		if (el_ret == ELEVATOR_BACK_MERGE) {
 			ret = bio_attempt_back_merge(q, rq, bio);
 			if (ret)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index cfcc37cb222b..160035f54882 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -471,3 +471,40 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 {
 	return attempt_merge(q, rq, next);
 }
+
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
+{
+	if (!rq_mergeable(rq))
+		return false;
+
+	/* don't merge file system requests and discard requests */
+	if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
+		return false;
+
+	/* don't merge discard requests and secure discard requests */
+	if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
+		return false;
+
+	/* different data direction or already started, don't merge */
+	if (bio_data_dir(bio) != rq_data_dir(rq))
+		return false;
+
+	/* must be same device and not a special request */
+	if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
+		return false;
+
+	/* only merge integrity protected bio into ditto rq */
+	if (bio_integrity(bio) != blk_integrity_rq(rq))
+		return false;
+
+	return true;
+}
+
+int blk_try_merge(struct request *rq, struct bio *bio)
+{
+	if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector)
+		return ELEVATOR_BACK_MERGE;
+	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector)
+		return ELEVATOR_FRONT_MERGE;
+	return ELEVATOR_NO_MERGE;
+}
diff --git a/block/blk.h b/block/blk.h
index 7efd772336de..9c12f80882b0 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -137,6 +137,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 				struct request *next);
 void blk_recalc_rq_segments(struct request *rq);
 void blk_rq_set_mixed_merge(struct request *rq);
+bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
+int blk_try_merge(struct request *rq, struct bio *bio);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
diff --git a/block/elevator.c b/block/elevator.c
index 91e18f8af9be..f016855a46b0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -70,39 +70,9 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 /*
  * can we safely merge with this request?
  */
-int elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
 {
-	if (!rq_mergeable(rq))
-		return 0;
-
-	/*
-	 * Don't merge file system requests and discard requests
-	 */
-	if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
-		return 0;
-
-	/*
-	 * Don't merge discard requests and secure discard requests
-	 */
-	if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
-		return 0;
-
-	/*
-	 * different data direction or already started, don't merge
-	 */
-	if (bio_data_dir(bio) != rq_data_dir(rq))
-		return 0;
-
-	/*
-	 * must be same device and not a special request
-	 */
-	if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
-		return 0;
-
-	/*
-	 * only merge integrity protected bio into ditto rq
-	 */
-	if (bio_integrity(bio) != blk_integrity_rq(rq))
+	if (!blk_rq_merge_ok(rq, bio))
 		return 0;
 
 	if (!elv_iosched_allow_merge(rq, bio))
@@ -112,23 +82,6 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_rq_merge_ok);
 
-int elv_try_merge(struct request *__rq, struct bio *bio)
-{
-	int ret = ELEVATOR_NO_MERGE;
-
-	/*
-	 * we can merge and sequence is ok, check if it's possible
-	 */
-	if (elv_rq_merge_ok(__rq, bio)) {
-		if (blk_rq_pos(__rq) + blk_rq_sectors(__rq) == bio->bi_sector)
-			ret = ELEVATOR_BACK_MERGE;
-		else if (blk_rq_pos(__rq) - bio_sectors(bio) == bio->bi_sector)
-			ret = ELEVATOR_FRONT_MERGE;
-	}
-
-	return ret;
-}
-
 static struct elevator_type *elevator_find(const char *name)
 {
 	struct elevator_type *e;
@@ -478,8 +431,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 	/*
 	 * First try one-hit cache.
 	 */
-	if (q->last_merge) {
-		ret = elv_try_merge(q->last_merge, bio);
+	if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
+		ret = blk_try_merge(q->last_merge, bio);
 		if (ret != ELEVATOR_NO_MERGE) {
 			*req = q->last_merge;
 			return ret;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c24f3d7fbf1e..d6dfb65c8885 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -122,7 +122,6 @@ extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
 extern void elv_add_request(struct request_queue *, struct request *, int);
 extern void __elv_add_request(struct request_queue *, struct request *, int);
 extern int elv_merge(struct request_queue *, struct request **, struct bio *);
-extern int elv_try_merge(struct request *, struct bio *);
 extern void elv_merge_requests(struct request_queue *, struct request *,
 			       struct request *);
 extern void elv_merged_request(struct request_queue *, struct request *, int);
@@ -155,7 +154,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
 extern int elevator_init(struct request_queue *, char *);
 extern void elevator_exit(struct elevator_queue *);
 extern int elevator_change(struct request_queue *, const char *);
-extern int elv_rq_merge_ok(struct request *, struct bio *);
+extern bool elv_rq_merge_ok(struct request *, struct bio *);
 
 /*
  * Helper functions.
-- 
cgit v1.2.3


From 07c2bd37350c9b1af71b35d05f16e300a6602948 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 8 Feb 2012 09:19:42 +0100
Subject: block: don't call elevator callbacks for plug merges

Plug merge calls two elevator callbacks outside queue lock -
elevator_allow_merge_fn() and elevator_bio_merged_fn().  Although
attempt_plug_merge() suggests that elevator is guaranteed to be there
through the existing request on the plug list, nothing prevents plug
merge from calling into dying or initializing elevator.

For regular merges, bypass ensures elvpriv count to reach zero, which
in turn prevents merges as all !ELVPRIV requests get REQ_SOFTBARRIER
from forced back insertion.  Plug merge doesn't check ELVPRIV, and, as
the requests haven't gone through elevator insertion yet, it doesn't
have SOFTBARRIER set allowing merges on a bypassed queue.

This, for example, leads to the following crash during elevator
switch.

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
 IP: [<ffffffff813b34e9>] cfq_allow_merge+0x49/0xa0
 PGD 112cbc067 PUD 115d5c067 PMD 0
 Oops: 0000 [#1] PREEMPT SMP
 CPU 1
 Modules linked in: deadline_iosched

 Pid: 819, comm: dd Not tainted 3.3.0-rc2-work+ #76 Bochs Bochs
 RIP: 0010:[<ffffffff813b34e9>]  [<ffffffff813b34e9>] cfq_allow_merge+0x49/0xa0
 RSP: 0018:ffff8801143a38f8  EFLAGS: 00010297
 RAX: 0000000000000000 RBX: ffff88011817ce28 RCX: ffff880116eb6cc0
 RDX: 0000000000000000 RSI: ffff880118056e20 RDI: ffff8801199512f8
 RBP: ffff8801143a3908 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000001 R11: 0000000000000000 R12: ffff880118195708
 R13: ffff880118052aa0 R14: ffff8801143a3d50 R15: ffff880118195708
 FS:  00007f19f82cb700(0000) GS:ffff88011fc80000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
 CR2: 0000000000000008 CR3: 0000000112c6a000 CR4: 00000000000006e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
 Process dd (pid: 819, threadinfo ffff8801143a2000, task ffff880116eb6cc0)
 Stack:
  ffff88011817ce28 ffff880118195708 ffff8801143a3928 ffffffff81391bba
  ffff88011817ce28 ffff880118195708 ffff8801143a3948 ffffffff81391bf1
  ffff88011817ce28 0000000000000000 ffff8801143a39a8 ffffffff81398e3e
 Call Trace:
  [<ffffffff81391bba>] elv_rq_merge_ok+0x4a/0x60
  [<ffffffff81391bf1>] elv_try_merge+0x21/0x40
  [<ffffffff81398e3e>] blk_queue_bio+0x8e/0x390
  [<ffffffff81396a5a>] generic_make_request+0xca/0x100
  [<ffffffff81396b04>] submit_bio+0x74/0x100
  [<ffffffff811d45c2>] __blockdev_direct_IO+0x1ce2/0x3450
  [<ffffffff811d0dc7>] blkdev_direct_IO+0x57/0x60
  [<ffffffff811460b5>] generic_file_aio_read+0x6d5/0x760
  [<ffffffff811986b2>] do_sync_read+0xe2/0x120
  [<ffffffff81199345>] vfs_read+0xc5/0x180
  [<ffffffff81199501>] sys_read+0x51/0x90
  [<ffffffff81aeac12>] system_call_fastpath+0x16/0x1b

There are multiple ways to fix this including making plug merge check
ELVPRIV; however,

* Calling into elevator outside queue lock is confusing and
  error-prone.

* Requests on plug list aren't known to the elevator.  They aren't on
  the elevator yet, so there's no elevator specific state to update.

* Given the nature of plug merges - collecting bio's for the same
  purpose from the same issuer - elevator specific restrictions aren't
  applicable.

So, simply don't call into elevator methods from plug merge by moving
elv_bio_merged() from bio_attempt_*_merge() to blk_queue_bio(), and
using blk_try_merge() in attempt_plug_merge().

This is based on Jens' patch to skip elevator_allow_merge_fn() from
plug merge.

Note that this makes per-cgroup merged stats skip plug merging.

Signed-off-by: Tejun Heo <tj@kernel.org>
LKML-Reference: <4F16F3CA.90904@kernel.dk>
Original-patch-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c         | 19 +++++++++----------
 block/cfq-iosched.c      | 15 ++++-----------
 include/linux/elevator.h |  6 ------
 3 files changed, 13 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index fa697bf691eb..3a78b00edd71 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1212,7 +1212,6 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	drive_stat_acct(req, 0);
-	elv_bio_merged(q, req, bio);
 	return true;
 }
 
@@ -1243,7 +1242,6 @@ static bool bio_attempt_front_merge(struct request_queue *q,
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	drive_stat_acct(req, 0);
-	elv_bio_merged(q, req, bio);
 	return true;
 }
 
@@ -1257,13 +1255,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
  * on %current's plugged list.  Returns %true if merge was successful,
  * otherwise %false.
  *
- * This function is called without @q->queue_lock; however, elevator is
- * accessed iff there already are requests on the plugged list which in
- * turn guarantees validity of the elevator.
- *
- * Note that, on successful merge, elevator operation
- * elevator_bio_merged_fn() will be called without queue lock.  Elevator
- * must be ready for this.
+ * Plugging coalesces IOs from the same issuer for the same purpose without
+ * going through @q->queue_lock.  As such it's more of an issuing mechanism
+ * than scheduling, and the request, while may have elvpriv data, is not
+ * added on the elevator at this point.  In addition, we don't have
+ * reliable access to the elevator outside queue lock.  Only check basic
+ * merging parameters without querying the elevator.
  */
 static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 			       unsigned int *request_count)
@@ -1282,7 +1279,7 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
 
 		(*request_count)++;
 
-		if (rq->q != q || !elv_rq_merge_ok(rq, bio))
+		if (rq->q != q || !blk_rq_merge_ok(rq, bio))
 			continue;
 
 		el_ret = blk_try_merge(rq, bio);
@@ -1347,12 +1344,14 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	el_ret = elv_merge(q, &req, bio);
 	if (el_ret == ELEVATOR_BACK_MERGE) {
 		if (bio_attempt_back_merge(q, req, bio)) {
+			elv_bio_merged(q, req, bio);
 			if (!attempt_back_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
 		}
 	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
 		if (bio_attempt_front_merge(q, req, bio)) {
+			elv_bio_merged(q, req, bio);
 			if (!attempt_front_merge(q, req))
 				elv_merged_request(q, req, el_ret);
 			goto out_unlock;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5684df6848bc..d0ba50533668 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1699,18 +1699,11 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 
 	/*
 	 * Lookup the cfqq that this bio will be queued with and allow
-	 * merge only if rq is queued there.  This function can be called
-	 * from plug merge without queue_lock.  In such cases, ioc of @rq
-	 * and %current are guaranteed to be equal.  Avoid lookup which
-	 * requires queue_lock by using @rq's cic.
+	 * merge only if rq is queued there.
 	 */
-	if (current->io_context == RQ_CIC(rq)->icq.ioc) {
-		cic = RQ_CIC(rq);
-	} else {
-		cic = cfq_cic_lookup(cfqd, current->io_context);
-		if (!cic)
-			return false;
-	}
+	cic = cfq_cic_lookup(cfqd, current->io_context);
+	if (!cic)
+		return false;
 
 	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
 	return cfqq == RQ_CFQQ(rq);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index d6dfb65c8885..7d4e0356f329 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -42,12 +42,6 @@ struct elevator_ops
 	elevator_merged_fn *elevator_merged_fn;
 	elevator_merge_req_fn *elevator_merge_req_fn;
 	elevator_allow_merge_fn *elevator_allow_merge_fn;
-
-	/*
-	 * Used for both plugged list and elevator merging and in the
-	 * former case called without queue_lock.  Read comment on top of
-	 * attempt_plug_merge() for details.
-	 */
 	elevator_bio_merged_fn *elevator_bio_merged_fn;
 
 	elevator_dispatch_fn *elevator_dispatch_fn;
-- 
cgit v1.2.3


From cdccaa9467b982d57b139818d15e1e994feca372 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 8 Feb 2012 20:03:14 +0100
Subject: cdrom: move shared static to cdrom_device_info

The keeplocked variable in the cdrom driver is shared across multiple
drives, but set in per-device ioctls.  Move it to the per-device struct,
avoiding that the setting on one drive affects the driver's behavior
when closing another.

[ Impact: limit udev's confusion to one drive when a CD burning program
  unlocks the CD door at the end of burning. ]

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/cdrom/cdrom.c | 12 +++++-------
 include/linux/cdrom.h |  3 ++-
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 8fefe59f52a7..d620b4495745 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -286,8 +286,6 @@
 
 /* used to tell the module to turn on full debugging messages */
 static bool debug;
-/* used to keep tray locked at all times */
-static int keeplocked;
 /* default compatibility mode */
 static bool autoclose=1;
 static bool autoeject;
@@ -1204,7 +1202,7 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
 		cdinfo(CD_CLOSE, "Use count for \"/dev/%s\" now zero\n", cdi->name);
 		cdrom_dvd_rw_close_write(cdi);
 
-		if ((cdo->capability & CDC_LOCK) && !keeplocked) {
+		if ((cdo->capability & CDC_LOCK) && !cdi->keeplocked) {
 			cdinfo(CD_CLOSE, "Unlocking door!\n");
 			cdo->lock_door(cdi, 0);
 		}
@@ -1371,7 +1369,7 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
 	curslot = info->hdr.curslot;
 	kfree(info);
 
-	if (cdi->use_count > 1 || keeplocked) {
+	if (cdi->use_count > 1 || cdi->keeplocked) {
 		if (slot == CDSL_CURRENT) {
 	    		return curslot;
 		} else {
@@ -2289,7 +2287,7 @@ static int cdrom_ioctl_eject(struct cdrom_device_info *cdi)
 
 	if (!CDROM_CAN(CDC_OPEN_TRAY))
 		return -ENOSYS;
-	if (cdi->use_count != 1 || keeplocked)
+	if (cdi->use_count != 1 || cdi->keeplocked)
 		return -EBUSY;
 	if (CDROM_CAN(CDC_LOCK)) {
 		int ret = cdi->ops->lock_door(cdi, 0);
@@ -2316,7 +2314,7 @@ static int cdrom_ioctl_eject_sw(struct cdrom_device_info *cdi,
 
 	if (!CDROM_CAN(CDC_OPEN_TRAY))
 		return -ENOSYS;
-	if (keeplocked)
+	if (cdi->keeplocked)
 		return -EBUSY;
 
 	cdi->options &= ~(CDO_AUTO_CLOSE | CDO_AUTO_EJECT);
@@ -2447,7 +2445,7 @@ static int cdrom_ioctl_lock_door(struct cdrom_device_info *cdi,
 	if (!CDROM_CAN(CDC_LOCK))
 		return -EDRIVE_CANT_DO_THIS;
 
-	keeplocked = arg ? 1 : 0;
+	cdi->keeplocked = arg ? 1 : 0;
 
 	/*
 	 * Don't unlock the door on multiple opens by default, but allow
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 35eae4b67503..7c48029dffe6 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -952,7 +952,8 @@ struct cdrom_device_info {
     	char name[20];                  /* name of the device type */
 /* per-device flags */
         __u8 sanyo_slot		: 2;	/* Sanyo 3 CD changer support */
-        __u8 reserved		: 6;	/* not used yet */
+        __u8 keeplocked		: 1;	/* CDROM_LOCKDOOR status */
+        __u8 reserved		: 5;	/* not used yet */
 	int cdda_method;		/* see flags */
 	__u8 last_sense;
 	__u8 media_written;		/* dirty flag, DVD+RW bookkeeping */
-- 
cgit v1.2.3


From d9f5343e35d9138432657202afa8e3ddb2ade360 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Thu, 5 Jan 2012 16:28:54 -0800
Subject: USB: Remove duplicate USB 3.0 hub feature #defines.

Somehow we ended up with duplicate hub feature #defines in ch11.h.
Tatyana Brokhman first created the USB 3.0 hub feature macros in 2.6.38
with commit 0eadcc09203349b11ca477ec367079b23d32ab91 "usb: USB3.0 ch11
definitions".  In 2.6.39, I modified a patch from John Youn that added
similar macros in a different place in the same file, and committed
dbe79bbe9dcb22cb3651c46f18943477141ca452 "USB 3.0 Hub Changes".

Some of the #defines used different names for the same values.  Others
used exactly the same names with the same values, like these gems:

 #define USB_PORT_FEAT_BH_PORT_RESET     28
...
 #define USB_PORT_FEAT_BH_PORT_RESET            28

According to my very geeky husband (who looked it up in the C99 spec),
it is allowed to have object-like macros with duplicate names as long as
the replacement list is exactly the same.  However, he recalled that
some compilers will give warnings when they find duplicate macros.  It's
probably best to remove the duplicates in the stable tree, so that the
code compiles for everyone.

The macros are now fixed to move the feature requests that are specific
to USB 3.0 hubs into a new section (out of the USB 2.0 hub feature
section), and use the most common macro name.

This patch should be backported to 2.6.39.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: Tatyana Brokhman <tlinder@codeaurora.org>
Cc: John Youn <johnyoun@synopsys.com>
Cc: Jamey Sharp <jamey@minilop.net>
Cc: stable@vger.kernel.org
---
 include/linux/usb/ch11.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
index 31fdb4c6ee3d..0b83acd3360a 100644
--- a/include/linux/usb/ch11.h
+++ b/include/linux/usb/ch11.h
@@ -61,12 +61,6 @@
 #define USB_PORT_FEAT_TEST              21
 #define USB_PORT_FEAT_INDICATOR         22
 #define USB_PORT_FEAT_C_PORT_L1         23
-#define USB_PORT_FEAT_C_PORT_LINK_STATE	25
-#define USB_PORT_FEAT_C_PORT_CONFIG_ERROR 26
-#define USB_PORT_FEAT_PORT_REMOTE_WAKE_MASK 27
-#define USB_PORT_FEAT_BH_PORT_RESET     28
-#define USB_PORT_FEAT_C_BH_PORT_RESET   29
-#define USB_PORT_FEAT_FORCE_LINKPM_ACCEPT 30
 
 /*
  * Port feature selectors added by USB 3.0 spec.
@@ -75,8 +69,8 @@
 #define USB_PORT_FEAT_LINK_STATE		5
 #define USB_PORT_FEAT_U1_TIMEOUT		23
 #define USB_PORT_FEAT_U2_TIMEOUT		24
-#define USB_PORT_FEAT_C_LINK_STATE		25
-#define USB_PORT_FEAT_C_CONFIG_ERR		26
+#define USB_PORT_FEAT_C_PORT_LINK_STATE		25
+#define USB_PORT_FEAT_C_PORT_CONFIG_ERROR	26
 #define USB_PORT_FEAT_REMOTE_WAKE_MASK		27
 #define USB_PORT_FEAT_BH_PORT_RESET		28
 #define USB_PORT_FEAT_C_BH_PORT_RESET		29
-- 
cgit v1.2.3


From 2c4967f741e87cdd63de7271b97807041dccbf3b Mon Sep 17 00:00:00 2001
From: Sujit Reddy Thumma <sthumma@codeaurora.org>
Date: Sat, 4 Feb 2012 16:14:50 -0500
Subject: mmc: core: Ensure clocks are always enabled before host interaction

Ensure clocks are always enabled before any interaction with the
host controller driver. This makes sure that there is no race
between host execution and the core layer turning off clocks
in different context with clock gating framework.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Per Forlin <per.forlin@stericsson.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c     | 19 ++++++++++++++++---
 drivers/mmc/core/host.h     | 21 ---------------------
 drivers/mmc/core/sd.c       | 22 ++++++++++++++++++----
 drivers/mmc/core/sdio_irq.c | 10 ++++++++--
 include/linux/mmc/host.h    | 19 +++++++++++++++++++
 5 files changed, 61 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index f545a3e6eb80..b3063b741df3 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -290,8 +290,11 @@ static void mmc_wait_for_req_done(struct mmc_host *host,
 static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
 		 bool is_first_req)
 {
-	if (host->ops->pre_req)
+	if (host->ops->pre_req) {
+		mmc_host_clk_hold(host);
 		host->ops->pre_req(host, mrq, is_first_req);
+		mmc_host_clk_release(host);
+	}
 }
 
 /**
@@ -306,8 +309,11 @@ static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
 static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
 			 int err)
 {
-	if (host->ops->post_req)
+	if (host->ops->post_req) {
+		mmc_host_clk_hold(host);
 		host->ops->post_req(host, mrq, err);
+		mmc_host_clk_release(host);
+	}
 }
 
 /**
@@ -620,7 +626,9 @@ int mmc_host_enable(struct mmc_host *host)
 		int err;
 
 		host->en_dis_recurs = 1;
+		mmc_host_clk_hold(host);
 		err = host->ops->enable(host);
+		mmc_host_clk_release(host);
 		host->en_dis_recurs = 0;
 
 		if (err) {
@@ -640,7 +648,9 @@ static int mmc_host_do_disable(struct mmc_host *host, int lazy)
 		int err;
 
 		host->en_dis_recurs = 1;
+		mmc_host_clk_hold(host);
 		err = host->ops->disable(host, lazy);
+		mmc_host_clk_release(host);
 		host->en_dis_recurs = 0;
 
 		if (err < 0) {
@@ -1203,8 +1213,11 @@ int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, bool cmd11
 
 	host->ios.signal_voltage = signal_voltage;
 
-	if (host->ops->start_signal_voltage_switch)
+	if (host->ops->start_signal_voltage_switch) {
+		mmc_host_clk_hold(host);
 		err = host->ops->start_signal_voltage_switch(host, &host->ios);
+		mmc_host_clk_release(host);
+	}
 
 	return err;
 }
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index fb8a5cd2e4a1..08a7852ade44 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -14,27 +14,6 @@
 
 int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
-
-#ifdef CONFIG_MMC_CLKGATE
-void mmc_host_clk_hold(struct mmc_host *host);
-void mmc_host_clk_release(struct mmc_host *host);
-unsigned int mmc_host_clk_rate(struct mmc_host *host);
-
-#else
-static inline void mmc_host_clk_hold(struct mmc_host *host)
-{
-}
-
-static inline void mmc_host_clk_release(struct mmc_host *host)
-{
-}
-
-static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
-{
-	return host->ios.clock;
-}
-#endif
-
 void mmc_host_deeper_disable(struct work_struct *work);
 
 #endif
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index c63ad03c29c7..5017f9354ce2 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -451,9 +451,11 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status)
 	 * information and let the hardware specific code
 	 * return what is possible given the options
 	 */
+	mmc_host_clk_hold(card->host);
 	drive_strength = card->host->ops->select_drive_strength(
 		card->sw_caps.uhs_max_dtr,
 		host_drv_type, card_drv_type);
+	mmc_host_clk_release(card->host);
 
 	err = mmc_sd_switch(card, 1, 2, drive_strength, status);
 	if (err)
@@ -660,9 +662,12 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
 		goto out;
 
 	/* SPI mode doesn't define CMD19 */
-	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
+	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning) {
+		mmc_host_clk_hold(card->host);
 		err = card->host->ops->execute_tuning(card->host,
 						      MMC_SEND_TUNING_BLOCK);
+		mmc_host_clk_release(card->host);
+	}
 
 out:
 	kfree(status);
@@ -850,8 +855,11 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
 	if (!reinit) {
 		int ro = -1;
 
-		if (host->ops->get_ro)
+		if (host->ops->get_ro) {
+			mmc_host_clk_hold(card->host);
 			ro = host->ops->get_ro(host);
+			mmc_host_clk_release(card->host);
+		}
 
 		if (ro < 0) {
 			pr_warning("%s: host does not "
@@ -967,8 +975,11 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		 * Since initialization is now complete, enable preset
 		 * value registers for UHS-I cards.
 		 */
-		if (host->ops->enable_preset_value)
+		if (host->ops->enable_preset_value) {
+			mmc_host_clk_hold(card->host);
 			host->ops->enable_preset_value(host, true);
+			mmc_host_clk_release(card->host);
+		}
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
@@ -1151,8 +1162,11 @@ int mmc_attach_sd(struct mmc_host *host)
 		return err;
 
 	/* Disable preset value enable if already set since last time */
-	if (host->ops->enable_preset_value)
+	if (host->ops->enable_preset_value) {
+		mmc_host_clk_hold(host);
 		host->ops->enable_preset_value(host, false);
+		mmc_host_clk_release(host);
+	}
 
 	err = mmc_send_app_op_cond(host, 0, &ocr);
 	if (err)
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index 68f81b9ee0fb..f573e7f9f740 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -146,15 +146,21 @@ static int sdio_irq_thread(void *_host)
 		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (host->caps & MMC_CAP_SDIO_IRQ)
+		if (host->caps & MMC_CAP_SDIO_IRQ) {
+			mmc_host_clk_hold(host);
 			host->ops->enable_sdio_irq(host, 1);
+			mmc_host_clk_release(host);
+		}
 		if (!kthread_should_stop())
 			schedule_timeout(period);
 		set_current_state(TASK_RUNNING);
 	} while (!kthread_should_stop());
 
-	if (host->caps & MMC_CAP_SDIO_IRQ)
+	if (host->caps & MMC_CAP_SDIO_IRQ) {
+		mmc_host_clk_hold(host);
 		host->ops->enable_sdio_irq(host, 0);
+		mmc_host_clk_release(host);
+	}
 
 	pr_debug("%s: IRQ thread exiting with code %d\n",
 		 mmc_hostname(host), ret);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0beba1e5e1ed..73e5ee8e9ca2 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -444,4 +444,23 @@ static inline int mmc_boot_partition_access(struct mmc_host *host)
 	return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC);
 }
 
+#ifdef CONFIG_MMC_CLKGATE
+void mmc_host_clk_hold(struct mmc_host *host);
+void mmc_host_clk_release(struct mmc_host *host);
+unsigned int mmc_host_clk_rate(struct mmc_host *host);
+
+#else
+static inline void mmc_host_clk_hold(struct mmc_host *host)
+{
+}
+
+static inline void mmc_host_clk_release(struct mmc_host *host)
+{
+}
+
+static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
+{
+	return host->ios.clock;
+}
+#endif
 #endif /* LINUX_MMC_HOST_H */
-- 
cgit v1.2.3


From 6e8201f57c9359c9c5dc8f9805c15a4392492a10 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Mon, 16 Jan 2012 17:49:01 +0900
Subject: mmc: core: add the capability for broken voltage

There is an understood mismatch between the voltage the host controller is
set to and the voltage supplied to the card by a fixed voltage regulator.
Teaching the driver to accept the mismatch is overly complicated.  Instead
just accept the regulator's voltage.

This patch adds MMC_CAP2_BROKEN_VOLTAGE.

If the voltage didn't satisfy between min_uV and max_uV, try to change
the voltage in core.c.  When changing the voltage, maybe use
regulator_set_voltage().

In regulator_set_voltage(), check the below condition.

	/* sanity check */
	if (!rdev->desc->ops->set_voltage &&
	    !rdev->desc->ops->set_voltage_sel) {
		ret = -EINVAL;
		goto out;
	}

If some board should use the fixed-regulator, always return -EINVAL.
Then, eMMC didn't initialize always.

So if use a fixed-regulator, we need to add the MMC_CAP2_BROKEN_VOLTAGE.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 4 ++++
 include/linux/mmc/host.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index b3063b741df3..18661554e79c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1131,6 +1131,10 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc,
 		 * might not allow this operation
 		 */
 		voltage = regulator_get_voltage(supply);
+
+		if (mmc->caps2 & MMC_CAP2_BROKEN_VOLTAGE)
+			min_uV = max_uV = voltage;
+
 		if (voltage < 0)
 			result = voltage;
 		else if (voltage < min_uV || voltage > max_uV)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 73e5ee8e9ca2..ee2b0363c040 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -257,6 +257,7 @@ struct mmc_host {
 #define MMC_CAP2_HS200_1_2V_SDR	(1 << 6)        /* can support */
 #define MMC_CAP2_HS200		(MMC_CAP2_HS200_1_8V_SDR | \
 				 MMC_CAP2_HS200_1_2V_SDR)
+#define MMC_CAP2_BROKEN_VOLTAGE	(1 << 7)	/* Use the broken voltage */
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 	unsigned int        power_notify_type;
-- 
cgit v1.2.3


From 3e73c36b4dc224529d0b0c0d5d69c0dacd793c42 Mon Sep 17 00:00:00 2001
From: Girish K S <girish.shivananjappa@linaro.org>
Date: Tue, 31 Jan 2012 15:44:03 +0530
Subject: mmc: core: Fix PowerOff Notify suspend/resume

Modified the mmc_poweroff to resume before sending the poweroff
notification command. In sleep mode only AWAKE and RESET commands are
allowed, so before sending the poweroff notification command resume from
sleep mode and then send the notification command.

PowerOff Notify is tested on a Synopsis Designware Host Controller
(eMMC 4.5). The suspend to RAM and resume works fine.

Signed-off-by: Girish K S <girish.shivananjappa@linaro.org>
Tested-by: Girish K S <girish.shivananjappa@linaro.org>
Reviewed-by: Saugata Das <saugata.das@linaro.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/core.c  | 26 +++++++++++++++++++-------
 drivers/mmc/core/mmc.c   | 17 ++++++++++++-----
 include/linux/mmc/card.h |  4 ++++
 3 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 18661554e79c..690255c7d4dc 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1256,6 +1256,7 @@ static void mmc_poweroff_notify(struct mmc_host *host)
 	int err = 0;
 
 	card = host->card;
+	mmc_claim_host(host);
 
 	/*
 	 * Send power notify command only if card
@@ -1286,6 +1287,7 @@ static void mmc_poweroff_notify(struct mmc_host *host)
 		/* Set the card state to no notification after the poweroff */
 		card->poweroff_notify_state = MMC_NO_POWER_NOTIFICATION;
 	}
+	mmc_release_host(host);
 }
 
 /*
@@ -1344,12 +1346,28 @@ static void mmc_power_up(struct mmc_host *host)
 
 void mmc_power_off(struct mmc_host *host)
 {
+	int err = 0;
 	mmc_host_clk_hold(host);
 
 	host->ios.clock = 0;
 	host->ios.vdd = 0;
 
-	mmc_poweroff_notify(host);
+	/*
+	 * For eMMC 4.5 device send AWAKE command before
+	 * POWER_OFF_NOTIFY command, because in sleep state
+	 * eMMC 4.5 devices respond to only RESET and AWAKE cmd
+	 */
+	if (host->card && mmc_card_is_sleep(host->card) &&
+	    host->bus_ops->resume) {
+		err = host->bus_ops->resume(host);
+
+		if (!err)
+			mmc_poweroff_notify(host);
+		else
+			pr_warning("%s: error %d during resume "
+				   "(continue with poweroff sequence)\n",
+				   mmc_hostname(host), err);
+	}
 
 	/*
 	 * Reset ocr mask to be the highest possible voltage supported for
@@ -2403,12 +2421,6 @@ int mmc_suspend_host(struct mmc_host *host)
 		 */
 		if (mmc_try_claim_host(host)) {
 			if (host->bus_ops->suspend) {
-				/*
-				 * For eMMC 4.5 device send notify command
-				 * before sleep, because in sleep state eMMC 4.5
-				 * devices respond to only RESET and AWAKE cmd
-				 */
-				mmc_poweroff_notify(host);
 				err = host->bus_ops->suspend(host);
 			}
 			mmc_do_release_host(host);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index dd3fcac684a3..9be031934e33 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1316,11 +1316,13 @@ static int mmc_suspend(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
-	if (mmc_card_can_sleep(host))
+	if (mmc_card_can_sleep(host)) {
 		err = mmc_card_sleep(host);
-	else if (!mmc_host_is_spi(host))
+		if (!err)
+			mmc_card_set_sleep(host->card);
+	} else if (!mmc_host_is_spi(host))
 		mmc_deselect_cards(host);
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 	mmc_release_host(host);
 
 	return err;
@@ -1340,7 +1342,11 @@ static int mmc_resume(struct mmc_host *host)
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
-	err = mmc_init_card(host, host->ocr, host->card);
+	if (mmc_card_is_sleep(host->card)) {
+		err = mmc_card_awake(host);
+		mmc_card_clr_sleep(host->card);
+	} else
+		err = mmc_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
 
 	return err;
@@ -1350,7 +1356,8 @@ static int mmc_power_restore(struct mmc_host *host)
 {
 	int ret;
 
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
+	mmc_card_clr_sleep(host->card);
 	mmc_claim_host(host);
 	ret = mmc_init_card(host, host->ocr, host->card);
 	mmc_release_host(host);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 9f22ba572de0..19a41d1737af 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -217,6 +217,7 @@ struct mmc_card {
 #define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
 #define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
 #define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
+#define MMC_STATE_SLEEP		(1<<9)		/* card is in sleep state */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -382,6 +383,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_sd_card_uhs(c)	((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 #define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
+#define mmc_card_is_sleep(c)	((c)->state & MMC_STATE_SLEEP)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
@@ -393,7 +395,9 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
+#define mmc_card_set_sleep(c)	((c)->state |= MMC_STATE_SLEEP)
 
+#define mmc_card_clr_sleep(c)	((c)->state &= ~MMC_STATE_SLEEP)
 /*
  * Quirk add/remove for MMC products.
  */
-- 
cgit v1.2.3


From f9c2a0dc42a6938ff2a80e55ca2bbd1d5581c72e Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Thu, 9 Feb 2012 14:32:43 +0900
Subject: mmc: dw_mmc: Fix PIO mode with support of highmem

Current PIO mode makes a kernel crash with CONFIG_HIGHMEM.
Highmem pages have a NULL from sg_virt(sg).
This patch fixes the following problem.

Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: Oops: 817 [#1] PREEMPT SMP
Modules linked in:
CPU: 0    Not tainted  (3.0.15-01423-gdbf465f #589)
PC is at dw_mci_pull_data32+0x4c/0x9c
LR is at dw_mci_read_data_pio+0x54/0x1f0
pc : [<c0358824>]    lr : [<c035988c>]    psr: 20000193
sp : c0619d48  ip : c0619d70  fp : c0619d6c
r10: 00000000  r9 : 00000002  r8 : 00001000
r7 : 00000200  r6 : 00000000  r5 : e1dd3100  r4 : 00000000
r3 : 65622023  r2 : 0000007f  r1 : eeb96000  r0 : e1dd3100
Flags: nzCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment
xkernel
Control: 10c5387d  Table: 61e2004a  DAC: 00000015
Process swapper (pid: 0, stack limit = 0xc06182f0)
Stack: (0xc0619d48 to 0xc061a000)
9d40:                   e1dd3100 e1a4f000 00000000 e1dd3100 e1a4f000 00000200
9d60: c0619da4 c0619d70 c035988c c03587e4 c0619d9c e18158f4 e1dd3100 e1dd3100
9d80: 00000020 00000000 00000000 00000020 c06e8a84 00000000 c0619e04 c0619da8
9da0: c0359b24 c0359844 e18158f4 e1dd3164 e1dd3168 e1dd3150 3d02fc79 e1dd3154
9dc0: e1dd3178 00000000 00000020 00000000 e1dd3150 00000000 c10dd7e8 e1a84900
9de0: c061e7cc 00000000 00000000 0000008d c06e8a84 c061e780 c0619e4c c0619e08
9e00: c00c4738 c0359a34 3d02fc79 00000000 c0619e4c c05a1698 c05a1670 c05a165c
9e20: c04de8b0 c061e780 c061e7cc e1a84900 ffffed68 0000008d c0618000 00000000
9e40: c0619e6c c0619e50 c00c48b4 c00c46c8 c061e780 c00423ac c061e7cc ffffed68
9e60: c0619e8c c0619e70 c00c7358 c00c487c 0000008d ffffee38 c0618000 ffffed68
9e80: c0619ea4 c0619e90 c00c4258 c00c72b0 c00423ac ffffee38 c0619ecc c0619ea8
9ea0: c004241c c00c4234 ffffffff f8810000 0000006d 00000002 00000001 7fffffff
9ec0: c0619f44 c0619ed0 c0048bc0 c00423c4 220ae7a9 00000000 386f0d30 0005d3a4
9ee0: c00423ac c10dd0b8 c06f2cd8 c0618000 c0594778 c003a674 7fffffff c0619f44
9f00: 386f0d30 c0619f18 c00a6f94 c005be3c 80000013 ffffffff 386f0d30 0005d3a4
9f20: 386f0d30 0005d2d1 c10dd0a8 c10dd0b8 c06f2cd8 c0618000 c0619f74 c0619f48
9f40: c0345858 c005be00 c00a2440 c0618000 c0618000 c00410d8 c06c1944 c00410fc
9f60: c0594778 c003a674 c0619f9c c0619f78 c004a7e8 c03457b4 c0618000 c06c18f8
9f80: 00000000 c0039c70 c06c18d4 c003a674 c0619fb4 c0619fa0 c04ceafc c004a714
9fa0: c06287b4 c06c18f8 c0619ff4 c0619fb8 c0008b68 c04cea68 c0008578 00000000
9fc0: 00000000 c003a674 00000000 10c5387d c0628658 c003aa78 c062f1c4 4000406a
9fe0: 413fc090 00000000 00000000 c0619ff8 40008044 c0008858 00000000 00000000
Backtrace:
[<c03587d8>] (dw_mci_pull_data32+0x0/0x9c) from [<c035988c>] (dw_mci_read_data_pio+0x54/0x1f0)
 r6:00000200 r5:e1a4f000 r4:e1dd3100
 [<c0359838>] (dw_mci_read_data_pio+0x0/0x1f0) from [<c0359b24>] (dw_mci_interrupt+0xfc/0x4a4)
[<c0359a28>] (dw_mci_interrupt+0x0/0x4a4) from [<c00c4738>] (handle_irq_event_percpu+0x7c/0x1b4)
[<c00c46bc>] (handle_irq_event_percpu+0x0/0x1b4) from [<c00c48b4>] (handle_irq_event+0x44/0x64)
[<c00c4870>] (handle_irq_event+0x0/0x64) from [<c00c7358>] (handle_fasteoi_irq+0xb4/0x124)
 r7:ffffed68 r6:c061e7cc r5:c00423ac r4:c061e780
 [<c00c72a4>] (handle_fasteoi_irq+0x0/0x124) from [<c00c4258>] (generic_handle_irq+0x30/0x38)
 r7:ffffed68 r6:c0618000 r5:ffffee38 r4:0000008d
 [<c00c4228>] (generic_handle_irq+0x0/0x38) from [<c004241c>] (asm_do_IRQ+0x64/0xe0)
 r5:ffffee38 r4:c00423ac
 [<c00423b8>] (asm_do_IRQ+0x0/0xe0) from [<c0048bc0>] (__irq_svc+0x80/0x14c)
Exception stack(0xc0619ed0 to 0xc0619f18)

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Acked-by: Will Newton <will.newton@imgtec.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/host/dw_mmc.c  | 144 +++++++++++++++++++++++----------------------
 include/linux/mmc/dw_mmc.h |   6 +-
 2 files changed, 79 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 0e342793ff14..8bec1c36b159 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -22,7 +22,6 @@
 #include <linux/ioport.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/scatterlist.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
@@ -502,8 +501,14 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
 		host->dir_status = DW_MCI_SEND_STATUS;
 
 	if (dw_mci_submit_data_dma(host, data)) {
+		int flags = SG_MITER_ATOMIC;
+		if (host->data->flags & MMC_DATA_READ)
+			flags |= SG_MITER_TO_SG;
+		else
+			flags |= SG_MITER_FROM_SG;
+
+		sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
 		host->sg = data->sg;
-		host->pio_offset = 0;
 		host->part_buf_start = 0;
 		host->part_buf_count = 0;
 
@@ -972,6 +977,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
 				 * generates a block interrupt, hence setting
 				 * the scatter-gather pointer to NULL.
 				 */
+				sg_miter_stop(&host->sg_miter);
 				host->sg = NULL;
 				ctrl = mci_readl(host, CTRL);
 				ctrl |= SDMMC_CTRL_FIFO_RESET;
@@ -1311,54 +1317,44 @@ static void dw_mci_pull_data(struct dw_mci *host, void *buf, int cnt)
 
 static void dw_mci_read_data_pio(struct dw_mci *host)
 {
-	struct scatterlist *sg = host->sg;
-	void *buf = sg_virt(sg);
-	unsigned int offset = host->pio_offset;
+	struct sg_mapping_iter *sg_miter = &host->sg_miter;
+	void *buf;
+	unsigned int offset;
 	struct mmc_data	*data = host->data;
 	int shift = host->data_shift;
 	u32 status;
 	unsigned int nbytes = 0, len;
+	unsigned int remain, fcnt;
 
 	do {
-		len = host->part_buf_count +
-			(SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
-		if (offset + len <= sg->length) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+
+		host->sg = sg_miter->__sg;
+		buf = sg_miter->addr;
+		remain = sg_miter->length;
+		offset = 0;
+
+		do {
+			fcnt = (SDMMC_GET_FCNT(mci_readl(host, STATUS))
+					<< shift) + host->part_buf_count;
+			len = min(remain, fcnt);
+			if (!len)
+				break;
 			dw_mci_pull_data(host, (void *)(buf + offset), len);
-
 			offset += len;
 			nbytes += len;
-
-			if (offset == sg->length) {
-				flush_dcache_page(sg_page(sg));
-				host->sg = sg = sg_next(sg);
-				if (!sg)
-					goto done;
-
-				offset = 0;
-				buf = sg_virt(sg);
-			}
-		} else {
-			unsigned int remaining = sg->length - offset;
-			dw_mci_pull_data(host, (void *)(buf + offset),
-					 remaining);
-			nbytes += remaining;
-
-			flush_dcache_page(sg_page(sg));
-			host->sg = sg = sg_next(sg);
-			if (!sg)
-				goto done;
-
-			offset = len - remaining;
-			buf = sg_virt(sg);
-			dw_mci_pull_data(host, buf, offset);
-			nbytes += offset;
-		}
+			remain -= len;
+		} while (remain);
+		sg_miter->consumed = offset;
 
 		status = mci_readl(host, MINTSTS);
 		mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
 		if (status & DW_MCI_DATA_ERROR_FLAGS) {
 			host->data_status = status;
 			data->bytes_xfered += nbytes;
+			sg_miter_stop(sg_miter);
+			host->sg = NULL;
 			smp_wmb();
 
 			set_bit(EVENT_DATA_ERROR, &host->pending_events);
@@ -1367,65 +1363,66 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 			return;
 		}
 	} while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/
-	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
+
+	if (!remain) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+		sg_miter->consumed = 0;
+	}
+	sg_miter_stop(sg_miter);
 	return;
 
 done:
 	data->bytes_xfered += nbytes;
+	sg_miter_stop(sg_miter);
+	host->sg = NULL;
 	smp_wmb();
 	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
 }
 
 static void dw_mci_write_data_pio(struct dw_mci *host)
 {
-	struct scatterlist *sg = host->sg;
-	void *buf = sg_virt(sg);
-	unsigned int offset = host->pio_offset;
+	struct sg_mapping_iter *sg_miter = &host->sg_miter;
+	void *buf;
+	unsigned int offset;
 	struct mmc_data	*data = host->data;
 	int shift = host->data_shift;
 	u32 status;
 	unsigned int nbytes = 0, len;
+	unsigned int fifo_depth = host->fifo_depth;
+	unsigned int remain, fcnt;
 
 	do {
-		len = ((host->fifo_depth -
-			SDMMC_GET_FCNT(mci_readl(host, STATUS))) << shift)
-			- host->part_buf_count;
-		if (offset + len <= sg->length) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+
+		host->sg = sg_miter->__sg;
+		buf = sg_miter->addr;
+		remain = sg_miter->length;
+		offset = 0;
+
+		do {
+			fcnt = ((fifo_depth -
+				 SDMMC_GET_FCNT(mci_readl(host, STATUS)))
+					<< shift) - host->part_buf_count;
+			len = min(remain, fcnt);
+			if (!len)
+				break;
 			host->push_data(host, (void *)(buf + offset), len);
-
 			offset += len;
 			nbytes += len;
-			if (offset == sg->length) {
-				host->sg = sg = sg_next(sg);
-				if (!sg)
-					goto done;
-
-				offset = 0;
-				buf = sg_virt(sg);
-			}
-		} else {
-			unsigned int remaining = sg->length - offset;
-
-			host->push_data(host, (void *)(buf + offset),
-					remaining);
-			nbytes += remaining;
-
-			host->sg = sg = sg_next(sg);
-			if (!sg)
-				goto done;
-
-			offset = len - remaining;
-			buf = sg_virt(sg);
-			host->push_data(host, (void *)buf, offset);
-			nbytes += offset;
-		}
+			remain -= len;
+		} while (remain);
+		sg_miter->consumed = offset;
 
 		status = mci_readl(host, MINTSTS);
 		mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
 		if (status & DW_MCI_DATA_ERROR_FLAGS) {
 			host->data_status = status;
 			data->bytes_xfered += nbytes;
+			sg_miter_stop(sg_miter);
+			host->sg = NULL;
 
 			smp_wmb();
 
@@ -1435,12 +1432,20 @@ static void dw_mci_write_data_pio(struct dw_mci *host)
 			return;
 		}
 	} while (status & SDMMC_INT_TXDR); /* if TXDR write again */
-	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
+
+	if (!remain) {
+		if (!sg_miter_next(sg_miter))
+			goto done;
+		sg_miter->consumed = 0;
+	}
+	sg_miter_stop(sg_miter);
 	return;
 
 done:
 	data->bytes_xfered += nbytes;
+	sg_miter_stop(sg_miter);
+	host->sg = NULL;
 	smp_wmb();
 	set_bit(EVENT_XFER_COMPLETE, &host->pending_events);
 }
@@ -1643,6 +1648,7 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 				 * block interrupt, hence setting the
 				 * scatter-gather pointer to NULL.
 				 */
+				sg_miter_stop(&host->sg_miter);
 				host->sg = NULL;
 
 				ctrl = mci_readl(host, CTRL);
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index e8779c6d1759..aae5d1f1bb39 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -14,6 +14,8 @@
 #ifndef LINUX_MMC_DW_MMC_H
 #define LINUX_MMC_DW_MMC_H
 
+#include <linux/scatterlist.h>
+
 #define MAX_MCI_SLOTS	2
 
 enum dw_mci_state {
@@ -40,7 +42,7 @@ struct mmc_data;
  * @lock: Spinlock protecting the queue and associated data.
  * @regs: Pointer to MMIO registers.
  * @sg: Scatterlist entry currently being processed by PIO code, if any.
- * @pio_offset: Offset into the current scatterlist entry.
+ * @sg_miter: PIO mapping scatterlist iterator.
  * @cur_slot: The slot which is currently using the controller.
  * @mrq: The request currently being processed on @cur_slot,
  *	or NULL if the controller is idle.
@@ -115,7 +117,7 @@ struct dw_mci {
 	void __iomem		*regs;
 
 	struct scatterlist	*sg;
-	unsigned int		pio_offset;
+	struct sg_mapping_iter	sg_miter;
 
 	struct dw_mci_slot	*cur_slot;
 	struct mmc_request	*mrq;
-- 
cgit v1.2.3


From 6b6dc836a195e077e76977b6c020a73de411b46d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 10 Feb 2012 11:03:00 +0100
Subject: vfs: Provide function to get superblock and wait for it to thaw

In quota code we need to find a superblock corresponding to a device and wait
for superblock to be unfrozen. However this waiting has to happen without
s_umount semaphore because that is required for superblock to thaw. So provide
a function in VFS for this to keep dances with s_umount where they belong.

[AV: implementation switched to saner variant]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 22 ++++++++++++++++++++++
 include/linux/fs.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/fs/super.c b/fs/super.c
index 6015c02296b7..6277ec6cb60a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -633,6 +633,28 @@ rescan:
 
 EXPORT_SYMBOL(get_super);
 
+/**
+ *	get_super_thawed - get thawed superblock of a device
+ *	@bdev: device to get the superblock for
+ *
+ *	Scans the superblock list and finds the superblock of the file system
+ *	mounted on the device. The superblock is returned once it is thawed
+ *	(or immediately if it was not frozen). %NULL is returned if no match
+ *	is found.
+ */
+struct super_block *get_super_thawed(struct block_device *bdev)
+{
+	while (1) {
+		struct super_block *s = get_super(bdev);
+		if (!s || s->s_frozen == SB_UNFROZEN)
+			return s;
+		up_read(&s->s_umount);
+		vfs_check_frozen(s, SB_FREEZE_WRITE);
+		put_super(s);
+	}
+}
+EXPORT_SYMBOL(get_super_thawed);
+
 /**
  * get_active_super - get an active reference to the superblock of a device
  * @bdev: device to get the superblock for
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 386da09f229d..69cd5bb640f5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2496,6 +2496,7 @@ extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_super_thawed(struct block_device *);
 extern struct super_block *get_active_super(struct block_device *bdev);
 extern void drop_super(struct super_block *sb);
 extern void iterate_supers(void (*)(struct super_block *, void *), void *);
-- 
cgit v1.2.3


From f2ea0f5f04c97b48c88edccba52b0682fbe45087 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 14 Jan 2012 21:44:49 +0300
Subject: crypto: sha512 - use standard ror64()

Use standard ror64() instead of hand-written.
There is no standard ror64, so create it.

The difference is shift value being "unsigned int" instead of uint64_t
(for which there is no reason). gcc starts to emit native ROR instructions
which it doesn't do for some reason currently. This should make the code
faster.

Patch survives in-tree crypto test and ping flood with hmac(sha512) on.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha512_generic.c | 13 ++++---------
 include/linux/bitops.h  | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index f04af931a682..107f6f7be5e1 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -31,11 +31,6 @@ static inline u64 Maj(u64 x, u64 y, u64 z)
         return (x & y) | (z & (x | y));
 }
 
-static inline u64 RORu64(u64 x, u64 y)
-{
-        return (x >> y) | (x << (64 - y));
-}
-
 static const u64 sha512_K[80] = {
         0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL,
         0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
@@ -66,10 +61,10 @@ static const u64 sha512_K[80] = {
         0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL,
 };
 
-#define e0(x)       (RORu64(x,28) ^ RORu64(x,34) ^ RORu64(x,39))
-#define e1(x)       (RORu64(x,14) ^ RORu64(x,18) ^ RORu64(x,41))
-#define s0(x)       (RORu64(x, 1) ^ RORu64(x, 8) ^ (x >> 7))
-#define s1(x)       (RORu64(x,19) ^ RORu64(x,61) ^ (x >> 6))
+#define e0(x)       (ror64(x,28) ^ ror64(x,34) ^ ror64(x,39))
+#define e1(x)       (ror64(x,14) ^ ror64(x,18) ^ ror64(x,41))
+#define s0(x)       (ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7))
+#define s1(x)       (ror64(x,19) ^ ror64(x,61) ^ (x >> 6))
 
 static inline void LOAD_OP(int I, u64 *W, const u8 *input)
 {
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a3ef66a2a083..fc8a3ffce320 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -49,6 +49,26 @@ static inline unsigned long hweight_long(unsigned long w)
 	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
 }
 
+/**
+ * rol64 - rotate a 64-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 rol64(__u64 word, unsigned int shift)
+{
+	return (word << shift) | (word >> (64 - shift));
+}
+
+/**
+ * ror64 - rotate a 64-bit value right
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u64 ror64(__u64 word, unsigned int shift)
+{
+	return (word >> shift) | (word << (64 - shift));
+}
+
 /**
  * rol32 - rotate a 32-bit value left
  * @word: value to rotate
-- 
cgit v1.2.3


From 59cca653a601372e9b4a430d867377a3e4a36d76 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Date: Thu, 2 Feb 2012 10:46:49 +0200
Subject: digsig: changed type of the timestamp

time_t was used in the signature and key packet headers,
which is typedef of long and is different on 32 and 64 bit architectures.
Signature and key format should be independent of architecture.
Similar to GPG, I have changed the type to uint32_t.

Signed-off-by: Dmitry Kasatkin <dmitry.kasatkin@intel.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/digsig.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/digsig.h b/include/linux/digsig.h
index b01558b15814..6f85a070bb45 100644
--- a/include/linux/digsig.h
+++ b/include/linux/digsig.h
@@ -30,7 +30,7 @@ enum digest_algo {
 
 struct pubkey_hdr {
 	uint8_t		version;	/* key format version */
-	time_t		timestamp;	/* key made, always 0 for now */
+	uint32_t	timestamp;	/* key made, always 0 for now */
 	uint8_t		algo;
 	uint8_t		nmpi;
 	char		mpi[0];
@@ -38,7 +38,7 @@ struct pubkey_hdr {
 
 struct signature_hdr {
 	uint8_t		version;	/* signature format version */
-	time_t		timestamp;	/* signature made */
+	uint32_t	timestamp;	/* signature made */
 	uint8_t		algo;
 	uint8_t		hash;
 	uint8_t		keyid[8];
-- 
cgit v1.2.3


From faf309009e2e18d30c032b7d9479f29b91677c37 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 21 Feb 2012 17:24:20 -0800
Subject: sys_poll: fix incorrect type for 'timeout' parameter

The 'poll()' system call timeout parameter is supposed to be 'int', not
'long'.

Now, the reason this matters is that right now 32-bit compat mode is
broken on at least x86-64, because the 32-bit code just calls
'sys_poll()' directly on x86-64, and the 32-bit argument will have been
zero-extended, turning a signed 'int' into a large unsigned 'long'
value.

We could just introduce a 'compat_sys_poll()' function for this, and
that may eventually be what we have to do, but since the actual standard
poll() semantics is *supposed* to be 'int', and since at least on x86-64
glibc sign-extends the argument before invocing the system call (so
nobody can actually use a 64-bit timeout value in user space _anyway_,
even in 64-bit binaries), the simpler solution would seem to be to just
fix the definition of the system call to match what it should have been
from the very start.

If it turns out that somebody somehow circumvents the user-level libc
64-bit sign extension and actually uses a large unsigned 64-bit timeout
despite that not being how poll() is supposed to work, we will need to
do the compat_sys_poll() approach.

Reported-by: Thomas Meyer <thomas@m3y3r.de>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/kernel/compat_wrapper.S | 2 +-
 fs/select.c                       | 2 +-
 include/linux/syscalls.h          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 18c51df9fe06..ff605a39cf43 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -662,7 +662,7 @@ ENTRY(sys32_getresuid16_wrapper)
 ENTRY(sys32_poll_wrapper)
 	llgtr	%r2,%r2			# struct pollfd *
 	llgfr	%r3,%r3			# unsigned int
-	lgfr	%r4,%r4			# long
+	lgfr	%r4,%r4			# int
 	jg	sys_poll		# branch to system call
 
 ENTRY(sys32_setresgid16_wrapper)
diff --git a/fs/select.c b/fs/select.c
index d33418fdc858..e782258d0de3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block)
 }
 
 SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
-		long, timeout_msecs)
+		int, timeout_msecs)
 {
 	struct timespec end_time, *to = NULL;
 	int ret;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 515669fa3c1d..8ec1153ff57b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -624,7 +624,7 @@ asmlinkage long sys_socketpair(int, int, int, int __user *);
 asmlinkage long sys_socketcall(int call, unsigned long __user *args);
 asmlinkage long sys_listen(int, int);
 asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
-				long timeout);
+				int timeout);
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
-- 
cgit v1.2.3