From c37bb26654bb8981ea237076e333eb37d4aa2dc6 Mon Sep 17 00:00:00 2001
From: Dennis Munsie <dmunsie@cecropia.com>
Date: Tue, 20 Jun 2006 14:55:55 -0400
Subject: intelfb: add preliminary i2c support

[01/07] i2c: add intelfb bit algorithm id

Adds the intelfb bit algorithm id to i2c-id.h.

Signed-off-by: Dennis Munsie <dmunsie@cecropia.com>
---
 include/linux/i2c-id.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index c8b81f419fd8..cce6074dd754 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -189,6 +189,7 @@
 #define I2C_HW_B_RADEON		0x01001e /* radeon framebuffer driver */
 #define I2C_HW_B_EM28XX		0x01001f /* em28xx video capture cards */
 #define I2C_HW_B_CX2341X	0x010020 /* Conexant CX2341X MPEG encoder cards */
+#define I2C_HW_B_INTELFB	0x010021 /* intel framebuffer driver */
 
 /* --- PCF 8584 based algorithms					*/
 #define I2C_HW_P_LP		0x020000 /* Parallel port interface */
-- 
cgit v1.2.3


From 132919ba80ad207755fe271277bfefff865a54fe Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 27 Aug 2006 13:56:52 +0100
Subject: [MMC] Remove data->blksz_bits member

data->blksz_bits is unused now - remove it.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mmc/mmc.c       | 1 -
 drivers/mmc/mmc_block.c | 1 -
 include/linux/mmc/mmc.h | 1 -
 3 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index 74eaaee66de0..5b9caa7978d3 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -996,7 +996,6 @@ static void mmc_read_scrs(struct mmc_host *host)
 
 		mmc_set_data_timeout(&data, card, 0);
 
-		data.blksz_bits = 3;
 		data.blksz = 1 << 3;
 		data.blocks = 1;
 		data.flags = MMC_DATA_READ;
diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c
index a0e0dad1b419..f3a99dd8df8f 100644
--- a/drivers/mmc/mmc_block.c
+++ b/drivers/mmc/mmc_block.c
@@ -172,7 +172,6 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 
 		brq.cmd.arg = req->sector << 9;
 		brq.cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
-		brq.data.blksz_bits = md->block_bits;
 		brq.data.blksz = 1 << md->block_bits;
 		brq.data.blocks = req->nr_sectors >> (md->block_bits - 9);
 		brq.stop.opcode = MMC_STOP_TRANSMISSION;
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 627e2c08ce41..a3594dfd6963 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -68,7 +68,6 @@ struct mmc_command {
 struct mmc_data {
 	unsigned int		timeout_ns;	/* data timeout (in ns, max 80ms) */
 	unsigned int		timeout_clks;	/* data timeout (in clocks) */
-	unsigned int		blksz_bits;	/* data block size */
 	unsigned int		blksz;		/* data block size */
 	unsigned int		blocks;		/* number of blocks */
 	unsigned int		error;		/* data error */
-- 
cgit v1.2.3


From db53f28b3a6d9338cca1b7e917dc063ac99e1871 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Wed, 30 Aug 2006 15:14:56 +0100
Subject: [MMC] Add multi block-write capability

Add a capability flag for drivers to set when they can perform multi-
block transfers to cards _and_ correctly report the number of bytes
transferred should an error occur.

The last point is very important - if a driver reports more bytes than
were actually accepted by the card and an error occurs, there is the
possibility for data loss.

Pierre Ossman provided the patch for wbsd and sdhci.

Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mmc/mmc_block.c  | 27 ++++++++++++++++++++-------
 drivers/mmc/mmci.c       |  1 +
 drivers/mmc/sdhci.c      |  2 +-
 drivers/mmc/wbsd.c       |  2 +-
 include/linux/mmc/host.h |  1 +
 5 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c
index f3a99dd8df8f..8d18b87bfd34 100644
--- a/drivers/mmc/mmc_block.c
+++ b/drivers/mmc/mmc_block.c
@@ -32,6 +32,7 @@
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/protocol.h>
+#include <linux/mmc/host.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -165,6 +166,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	do {
 		struct mmc_blk_request brq;
 		struct mmc_command cmd;
+		u32 readcmd, writecmd;
 
 		memset(&brq, 0, sizeof(struct mmc_blk_request));
 		brq.mrq.cmd = &brq.cmd;
@@ -180,20 +182,31 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 
 		mmc_set_data_timeout(&brq.data, card, rq_data_dir(req) != READ);
 
-		if (rq_data_dir(req) == READ) {
-			brq.cmd.opcode = brq.data.blocks > 1 ? MMC_READ_MULTIPLE_BLOCK : MMC_READ_SINGLE_BLOCK;
-			brq.data.flags |= MMC_DATA_READ;
-		} else {
-			brq.cmd.opcode = MMC_WRITE_BLOCK;
-			brq.data.flags |= MMC_DATA_WRITE;
+		/*
+		 * If the host doesn't support multiple block writes, force
+		 * block writes to single block.
+		 */
+		if (rq_data_dir(req) != READ &&
+		    !(card->host->caps & MMC_CAP_MULTIWRITE))
 			brq.data.blocks = 1;
-		}
 
 		if (brq.data.blocks > 1) {
 			brq.data.flags |= MMC_DATA_MULTI;
 			brq.mrq.stop = &brq.stop;
+			readcmd = MMC_READ_MULTIPLE_BLOCK;
+			writecmd = MMC_WRITE_MULTIPLE_BLOCK;
 		} else {
 			brq.mrq.stop = NULL;
+			readcmd = MMC_READ_SINGLE_BLOCK;
+			writecmd = MMC_WRITE_BLOCK;
+		}
+
+		if (rq_data_dir(req) == READ) {
+			brq.cmd.opcode = readcmd;
+			brq.data.flags |= MMC_DATA_READ;
+		} else {
+			brq.cmd.opcode = writecmd;
+			brq.data.flags |= MMC_DATA_WRITE;
 		}
 
 		brq.data.sg = mq->sg;
diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c
index 8419489e7744..2b5a0cc9ea56 100644
--- a/drivers/mmc/mmci.c
+++ b/drivers/mmc/mmci.c
@@ -509,6 +509,7 @@ static int mmci_probe(struct amba_device *dev, void *id)
 	mmc->f_min = (host->mclk + 511) / 512;
 	mmc->f_max = min(host->mclk, fmax);
 	mmc->ocr_avail = plat->ocr_mask;
+	mmc->caps = MMC_CAP_MULTIWRITE;
 
 	/*
 	 * We can do SGIO
diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c
index 4e21b3b9d330..dea4edd1c434 100644
--- a/drivers/mmc/sdhci.c
+++ b/drivers/mmc/sdhci.c
@@ -1262,7 +1262,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
 	mmc->ops = &sdhci_ops;
 	mmc->f_min = host->max_clk / 256;
 	mmc->f_max = host->max_clk;
-	mmc->caps = MMC_CAP_4_BIT_DATA;
+	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE;
 
 	mmc->ocr_avail = 0;
 	if (caps & SDHCI_CAN_VDD_330)
diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c
index c351c6d1a18a..4a6617d9a49f 100644
--- a/drivers/mmc/wbsd.c
+++ b/drivers/mmc/wbsd.c
@@ -1323,7 +1323,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev)
 	mmc->f_min = 375000;
 	mmc->f_max = 24000000;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-	mmc->caps = MMC_CAP_4_BIT_DATA;
+	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE;
 
 	spin_lock_init(&host->lock);
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index ba095aebedff..b282ec9bba08 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -85,6 +85,7 @@ struct mmc_host {
 	unsigned long		caps;		/* Host capabilities */
 
 #define MMC_CAP_4_BIT_DATA	(1 << 0)	/* Can the host do 4 bit transfers */
+#define MMC_CAP_MULTIWRITE	(1 << 1)	/* Can accurately report bytes sent to card on error */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
-- 
cgit v1.2.3


From 42431acbac43eb47c774c29d370f5c59136805bf Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 24 Sep 2006 10:44:09 +0100
Subject: [MMC] MMC_CAP_BYTEBLOCK flag for non-log2 block sizes capable hosts

Some MMC hosts can only handle log2 block sizes.  Unfortunately,
the MMC password support needs to be able to send non-log2 block
sizes.  Provide a capability so that the MMC password support can
decide whether it should use this support or not.

The unfortunate side effect of this host limitation is that any
MMC card protected by a password which is not a log2 block size
can not be accessed on a host which only allows a log2 block size.

This change just adds the flag.  The MMC password support code
needs updating to use it (if and when it is finally submitted.)

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mmc/at91_mci.c   | 1 +
 drivers/mmc/imxmmc.c     | 2 +-
 drivers/mmc/omap.c       | 7 ++++---
 drivers/mmc/sdhci.c      | 2 +-
 drivers/mmc/wbsd.c       | 2 +-
 include/linux/mmc/host.h | 1 +
 6 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/at91_mci.c b/drivers/mmc/at91_mci.c
index 6b7638b84290..d34b7d9d92ed 100644
--- a/drivers/mmc/at91_mci.c
+++ b/drivers/mmc/at91_mci.c
@@ -822,6 +822,7 @@ static int at91_mci_probe(struct platform_device *pdev)
 	mmc->f_min = 375000;
 	mmc->f_max = 25000000;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+	mmc->caps = MMC_CAP_BYTEBLOCK;
 
 	host = mmc_priv(mmc);
 	host->mmc = mmc;
diff --git a/drivers/mmc/imxmmc.c b/drivers/mmc/imxmmc.c
index fb6565b98f32..1b79dd271aae 100644
--- a/drivers/mmc/imxmmc.c
+++ b/drivers/mmc/imxmmc.c
@@ -956,7 +956,7 @@ static int imxmci_probe(struct platform_device *pdev)
 	mmc->f_min = 150000;
 	mmc->f_max = CLK_RATE/2;
 	mmc->ocr_avail = MMC_VDD_32_33;
-	mmc->caps |= MMC_CAP_4_BIT_DATA;
+	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_BYTEBLOCK;
 
 	/* MMC core transfer sizes tunable parameters */
 	mmc->max_hw_segs = 64;
diff --git a/drivers/mmc/omap.c b/drivers/mmc/omap.c
index ddf06b32c159..52c9e52e6b78 100644
--- a/drivers/mmc/omap.c
+++ b/drivers/mmc/omap.c
@@ -1034,13 +1034,14 @@ static int __init mmc_omap_probe(struct platform_device *pdev)
 	host->irq = pdev->resource[1].start;
 	host->base = (void __iomem*)IO_ADDRESS(r->start);
 
-	if (minfo->wire4)
-		 mmc->caps |= MMC_CAP_4_BIT_DATA;
-
 	mmc->ops = &mmc_omap_ops;
 	mmc->f_min = 400000;
 	mmc->f_max = 24000000;
 	mmc->ocr_avail = MMC_VDD_32_33|MMC_VDD_33_34;
+	mmc->caps = MMC_CAP_BYTEBLOCK;
+
+	if (minfo->wire4)
+		 mmc->caps |= MMC_CAP_4_BIT_DATA;
 
 	/* Use scatterlist DMA to reduce per-transfer costs.
 	 * NOTE max_seg_size assumption that small blocks aren't
diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c
index dea4edd1c434..fdfc3838dd79 100644
--- a/drivers/mmc/sdhci.c
+++ b/drivers/mmc/sdhci.c
@@ -1262,7 +1262,7 @@ static int __devinit sdhci_probe_slot(struct pci_dev *pdev, int slot)
 	mmc->ops = &sdhci_ops;
 	mmc->f_min = host->max_clk / 256;
 	mmc->f_max = host->max_clk;
-	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE;
+	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE | MMC_CAP_BYTEBLOCK;
 
 	mmc->ocr_avail = 0;
 	if (caps & SDHCI_CAN_VDD_330)
diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c
index 4a6617d9a49f..6435a6822ad3 100644
--- a/drivers/mmc/wbsd.c
+++ b/drivers/mmc/wbsd.c
@@ -1323,7 +1323,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev)
 	mmc->f_min = 375000;
 	mmc->f_max = 24000000;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE;
+	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_MULTIWRITE | MMC_CAP_BYTEBLOCK;
 
 	spin_lock_init(&host->lock);
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index b282ec9bba08..587264a58d56 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -86,6 +86,7 @@ struct mmc_host {
 
 #define MMC_CAP_4_BIT_DATA	(1 << 0)	/* Can the host do 4 bit transfers */
 #define MMC_CAP_MULTIWRITE	(1 << 1)	/* Can accurately report bytes sent to card on error */
+#define MMC_CAP_BYTEBLOCK	(1 << 2)	/* Can do non-log2 block sizes */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
-- 
cgit v1.2.3


From 1739adea321788e380794c1072c810d445090bca Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 26 Aug 2006 03:05:17 -0300
Subject: V4L/DVB (4545): Add missing v4l2_buf_type to struct
 v4l2_sliced_vbi_cap.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev2.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index e3715d774197..d5746d470c73 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1135,7 +1135,8 @@ struct v4l2_sliced_vbi_cap
 				 (equals frame lines 313-336 for 625 line video
 				  standards, 263-286 for 525 line standards) */
 	__u16   service_lines[2][24];
-	__u32   reserved[4];    /* must be 0 */
+	enum v4l2_buf_type type;
+	__u32   reserved[3];    /* must be 0 */
 };
 
 struct v4l2_sliced_vbi_data
-- 
cgit v1.2.3


From 616b8b639e6491cfa63f79238a5c6fbee383eb09 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 1 Sep 2006 18:36:48 -0300
Subject: V4L/DVB (4585): VIDIOC_G_SLICED_VBI_CAP now accepts a v4l2_buf_type,
 make it IOWR

The VIDIOC_G_SLICED_VBI_CAP needs to receive the v4l2_buf_type field before
it can return a result. Hence this ioctl must be IOWR, not IOR. Since this
ioctl is still marked experimental we can make this change.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index d5746d470c73..44c59da26ed2 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1243,7 +1243,7 @@ struct v4l2_streamparm
 #define VIDIOC_G_PRIORITY       _IOR  ('V', 67, enum v4l2_priority)
 #define VIDIOC_S_PRIORITY       _IOW  ('V', 68, enum v4l2_priority)
 #if 1
-#define VIDIOC_G_SLICED_VBI_CAP _IOR  ('V', 69, struct v4l2_sliced_vbi_cap)
+#define VIDIOC_G_SLICED_VBI_CAP _IOWR ('V', 69, struct v4l2_sliced_vbi_cap)
 #endif
 #define VIDIOC_LOG_STATUS       _IO   ('V', 70)
 #define VIDIOC_G_EXT_CTRLS	_IOWR ('V', 71, struct v4l2_ext_controls)
-- 
cgit v1.2.3


From 29fa06c1292f473ae51a84f55c8fe22179bc1080 Mon Sep 17 00:00:00 2001
From: Rudolf Marek <r.marek@sh.cvut.cz>
Date: Mon, 28 Aug 2006 14:40:17 +0200
Subject: hwmon: New driver k8temp

Add support for the temperature sensor(s) found in AMD K8 CPUs.

Signed-off-by: Rudolf Marek <r.marek@sh.cvut.cz>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/hwmon/Kconfig   |  10 ++
 drivers/hwmon/Makefile  |   1 +
 drivers/hwmon/k8temp.c  | 292 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h |   1 +
 4 files changed, 304 insertions(+)
 create mode 100644 drivers/hwmon/k8temp.c

(limited to 'include/linux')

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 78c237f8fa0a..d9f86e9d405b 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -94,6 +94,16 @@ config SENSORS_ADM9240
 	  This driver can also be built as a module.  If so, the module
 	  will be called adm9240.
 
+config SENSORS_K8TEMP
+	tristate "AMD K8 processor sensor"
+	depends on HWMON && X86 && PCI && EXPERIMENTAL
+	help
+	  If you say yes here you get support for the temperature
+	  sensor(s) inside your AMD K8 CPU.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called k8temp.
+
 config SENSORS_ASB100
 	tristate "Asus ASB100 Bach"
 	depends on HWMON && I2C && EXPERIMENTAL
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 31415843a91a..aab4c1063059 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_SENSORS_GL518SM)	+= gl518sm.o
 obj-$(CONFIG_SENSORS_GL520SM)	+= gl520sm.o
 obj-$(CONFIG_SENSORS_HDAPS)	+= hdaps.o
 obj-$(CONFIG_SENSORS_IT87)	+= it87.o
+obj-$(CONFIG_SENSORS_K8TEMP)	+= k8temp.o
 obj-$(CONFIG_SENSORS_LM63)	+= lm63.o
 obj-$(CONFIG_SENSORS_LM70)	+= lm70.o
 obj-$(CONFIG_SENSORS_LM75)	+= lm75.o
diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c
new file mode 100644
index 000000000000..50162ffa8832
--- /dev/null
+++ b/drivers/hwmon/k8temp.c
@@ -0,0 +1,292 @@
+/*
+ * k8temp.c - Linux kernel module for hardware monitoring
+ *
+ * Copyright (C) 2006 Rudolf Marek <r.marek@sh.cvut.cz>
+ *
+ * Inspired from the w83785 and amd756 drivers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/pci.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+
+#define TEMP_FROM_REG(val)	(((((val) >> 16) & 0xff) - 49) * 1000)
+#define REG_TEMP	0xe4
+#define SEL_PLACE	0x40
+#define SEL_CORE	0x04
+
+struct k8temp_data {
+	struct class_device *class_dev;
+	struct mutex update_lock;
+	const char *name;
+	char valid;		/* zero until following fields are valid */
+	unsigned long last_updated;	/* in jiffies */
+
+	/* registers values */
+	u8 sensorsp;		/* sensor presence bits - SEL_CORE & SEL_PLACE */
+	u32 temp[2][2];		/* core, place */
+};
+
+static struct k8temp_data *k8temp_update_device(struct device *dev)
+{
+	struct k8temp_data *data = dev_get_drvdata(dev);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u8 tmp;
+
+	mutex_lock(&data->update_lock);
+
+	if (!data->valid
+	    || time_after(jiffies, data->last_updated + HZ)) {
+		pci_read_config_byte(pdev, REG_TEMP, &tmp);
+		tmp &= ~(SEL_PLACE | SEL_CORE);		/* Select sensor 0, core0 */
+		pci_write_config_byte(pdev, REG_TEMP, tmp);
+		pci_read_config_dword(pdev, REG_TEMP, &data->temp[0][0]);
+
+		if (data->sensorsp & SEL_PLACE) {
+			tmp |= SEL_PLACE;	/* Select sensor 1, core0 */
+			pci_write_config_byte(pdev, REG_TEMP, tmp);
+			pci_read_config_dword(pdev, REG_TEMP,
+					      &data->temp[0][1]);
+		}
+
+		if (data->sensorsp & SEL_CORE) {
+			tmp &= ~SEL_PLACE;	/* Select sensor 0, core1 */
+			tmp |= SEL_CORE;
+			pci_write_config_byte(pdev, REG_TEMP, tmp);
+			pci_read_config_dword(pdev, REG_TEMP,
+					      &data->temp[1][0]);
+
+			if (data->sensorsp & SEL_PLACE) {
+				tmp |= SEL_PLACE;	/* Select sensor 1, core1 */
+				pci_write_config_byte(pdev, REG_TEMP, tmp);
+				pci_read_config_dword(pdev, REG_TEMP,
+						      &data->temp[1][1]);
+			}
+		}
+
+		data->last_updated = jiffies;
+		data->valid = 1;
+	}
+
+	mutex_unlock(&data->update_lock);
+	return data;
+}
+
+/*
+ * Sysfs stuff
+ */
+
+static ssize_t show_name(struct device *dev, struct device_attribute
+			 *devattr, char *buf)
+{
+	struct k8temp_data *data = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", data->name);
+}
+
+
+static ssize_t show_temp(struct device *dev,
+			 struct device_attribute *devattr, char *buf)
+{
+	struct sensor_device_attribute_2 *attr =
+	    to_sensor_dev_attr_2(devattr);
+	int core = attr->nr;
+	int place = attr->index;
+	struct k8temp_data *data = k8temp_update_device(dev);
+
+	return sprintf(buf, "%d\n",
+		       TEMP_FROM_REG(data->temp[core][place]));
+}
+
+/* core, place */
+
+static SENSOR_DEVICE_ATTR_2(temp1_input, S_IRUGO, show_temp, NULL, 0, 0);
+static SENSOR_DEVICE_ATTR_2(temp2_input, S_IRUGO, show_temp, NULL, 0, 1);
+static SENSOR_DEVICE_ATTR_2(temp3_input, S_IRUGO, show_temp, NULL, 1, 0);
+static SENSOR_DEVICE_ATTR_2(temp4_input, S_IRUGO, show_temp, NULL, 1, 1);
+static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+
+static struct pci_device_id k8temp_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
+	{ 0 },
+};
+
+static int __devinit k8temp_probe(struct pci_dev *pdev,
+				  const struct pci_device_id *id)
+{
+	int err;
+	u8 scfg;
+	u32 temp;
+	struct k8temp_data *data;
+	u32 cpuid = cpuid_eax(1);
+
+	/* this feature should be available since SH-C0 core */
+	if ((cpuid == 0xf40) || (cpuid == 0xf50) || (cpuid == 0xf51)) {
+		err = -ENODEV;
+		goto exit;
+	}
+
+	if (!(data = kzalloc(sizeof(struct k8temp_data), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	pci_read_config_byte(pdev, REG_TEMP, &scfg);
+	scfg &= ~(SEL_PLACE | SEL_CORE);		/* Select sensor 0, core0 */
+	pci_write_config_byte(pdev, REG_TEMP, scfg);
+	pci_read_config_byte(pdev, REG_TEMP, &scfg);
+
+	if (scfg & (SEL_PLACE | SEL_CORE)) {
+		dev_err(&pdev->dev, "Configuration bit(s) stuck at 1!\n");
+		err = -ENODEV;
+		goto exit_free;
+	}
+
+	scfg |= (SEL_PLACE | SEL_CORE);
+	pci_write_config_byte(pdev, REG_TEMP, scfg);
+
+	/* now we know if we can change core and/or sensor */
+	pci_read_config_byte(pdev, REG_TEMP, &data->sensorsp);
+
+	if (data->sensorsp & SEL_PLACE) {
+		scfg &= ~SEL_CORE;	/* Select sensor 1, core0 */
+		pci_write_config_byte(pdev, REG_TEMP, scfg);
+		pci_read_config_dword(pdev, REG_TEMP, &temp);
+		scfg |= SEL_CORE;	/* prepare for next selection */
+		if (!((temp >> 16) & 0xff))	/* if temp is 0 -49C is not likely */
+			data->sensorsp &= ~SEL_PLACE;
+	}
+
+	if (data->sensorsp & SEL_CORE) {
+		scfg &= ~SEL_PLACE;	/* Select sensor 0, core1 */
+		pci_write_config_byte(pdev, REG_TEMP, scfg);
+		pci_read_config_dword(pdev, REG_TEMP, &temp);
+		if (!((temp >> 16) & 0xff))	/* if temp is 0 -49C is not likely */
+			data->sensorsp &= ~SEL_CORE;
+	}
+
+	data->name = "k8temp";
+	mutex_init(&data->update_lock);
+	dev_set_drvdata(&pdev->dev, data);
+
+	/* Register sysfs hooks */
+	err = device_create_file(&pdev->dev,
+			   &sensor_dev_attr_temp1_input.dev_attr);
+	if (err)
+		goto exit_remove;
+
+	/* sensor can be changed and reports something */
+	if (data->sensorsp & SEL_PLACE) {
+		err = device_create_file(&pdev->dev,
+				   &sensor_dev_attr_temp2_input.dev_attr);
+		if (err)
+			goto exit_remove;
+	}
+
+	/* core can be changed and reports something */
+	if (data->sensorsp & SEL_CORE) {
+		err = device_create_file(&pdev->dev,
+				   &sensor_dev_attr_temp3_input.dev_attr);
+		if (err)
+			goto exit_remove;
+		if (data->sensorsp & SEL_PLACE)
+			err = device_create_file(&pdev->dev,
+					   &sensor_dev_attr_temp4_input.
+					   dev_attr);
+			if (err)
+				goto exit_remove;
+	}
+
+	err = device_create_file(&pdev->dev, &dev_attr_name);
+	if (err)
+		goto exit_remove;
+
+	data->class_dev = hwmon_device_register(&pdev->dev);
+
+	if (IS_ERR(data->class_dev)) {
+		err = PTR_ERR(data->class_dev);
+		goto exit_remove;
+	}
+
+	return 0;
+
+exit_remove:
+	device_remove_file(&pdev->dev,
+			   &sensor_dev_attr_temp1_input.dev_attr);
+	device_remove_file(&pdev->dev,
+			   &sensor_dev_attr_temp2_input.dev_attr);
+	device_remove_file(&pdev->dev,
+			   &sensor_dev_attr_temp3_input.dev_attr);
+	device_remove_file(&pdev->dev,
+			   &sensor_dev_attr_temp4_input.dev_attr);
+	device_remove_file(&pdev->dev, &dev_attr_name);
+exit_free:
+	dev_set_drvdata(&pdev->dev, NULL);
+	kfree(data);
+exit:
+	return err;
+}
+
+static void __devexit k8temp_remove(struct pci_dev *pdev)
+{
+	struct k8temp_data *data = dev_get_drvdata(&pdev->dev);
+
+	hwmon_device_unregister(data->class_dev);
+	device_remove_file(&pdev->dev,
+			   &sensor_dev_attr_temp1_input.dev_attr);
+	device_remove_file(&pdev->dev,
+			   &sensor_dev_attr_temp2_input.dev_attr);
+	device_remove_file(&pdev->dev,
+			   &sensor_dev_attr_temp3_input.dev_attr);
+	device_remove_file(&pdev->dev,
+			   &sensor_dev_attr_temp4_input.dev_attr);
+	device_remove_file(&pdev->dev, &dev_attr_name);
+	dev_set_drvdata(&pdev->dev, NULL);
+	kfree(data);
+}
+
+static struct pci_driver k8temp_driver = {
+	.name = "k8temp",
+	.id_table = k8temp_ids,
+	.probe = k8temp_probe,
+	.remove = __devexit_p(k8temp_remove),
+};
+
+static int __init k8temp_init(void)
+{
+	return pci_register_driver(&k8temp_driver);
+}
+
+static void __exit k8temp_exit(void)
+{
+	pci_unregister_driver(&k8temp_driver);
+}
+
+MODULE_AUTHOR("Rudolf Marek <r.marek@sh.cvut.cz>");
+MODULE_DESCRIPTION("AMD K8 core temperature monitor");
+MODULE_LICENSE("GPL");
+
+module_init(k8temp_init)
+module_exit(k8temp_exit)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ab032ceafa84..61db1907f06f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -479,6 +479,7 @@
 
 #define PCI_VENDOR_ID_AMD		0x1022
 #define PCI_DEVICE_ID_AMD_K8_NB		0x1100
+#define PCI_DEVICE_ID_AMD_K8_NB_MISC	0x1103
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
 #define PCI_DEVICE_ID_AMD_SCSI		0x2020
-- 
cgit v1.2.3


From e0318ebff4d96131bb3524308b845f642e64df81 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 26 Sep 2006 14:50:20 -0400
Subject: USB: fix autosuspend when CONFIG_PM isn't set

This patch (as791b) fixes things up to avoid compiler warnings or
errors when CONFIG_USB_SUSPEND or CONFIG_PM isn't set.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c | 42 +++++++++++++++++++++---------------------
 drivers/usb/core/hub.c    |  4 ++--
 drivers/usb/core/usb.c    |  4 ++--
 drivers/usb/core/usb.h    | 18 +++++++++++++++++-
 include/linux/usb.h       |  2 +-
 5 files changed, 43 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index ee18d187ca17..113e484c763e 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -303,11 +303,11 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	dev->driver = &driver->drvwrap.driver;
 	usb_set_intfdata(iface, priv);
 
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	usb_pm_lock(udev);
 	iface->condition = USB_INTERFACE_BOUND;
 	mark_active(iface);
 	iface->pm_usage_cnt = !(driver->supports_autosuspend);
-	mutex_unlock(&udev->pm_mutex);
+	usb_pm_unlock(udev);
 
 	/* if interface was already added, bind now; else let
 	 * the future device_add() bind it, bypassing probe()
@@ -356,11 +356,11 @@ void usb_driver_release_interface(struct usb_driver *driver,
 	dev->driver = NULL;
 	usb_set_intfdata(iface, NULL);
 
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	usb_pm_lock(udev);
 	iface->condition = USB_INTERFACE_UNBOUND;
 	mark_quiesced(iface);
 	iface->needs_remote_wakeup = 0;
-	mutex_unlock(&udev->pm_mutex);
+	usb_pm_unlock(udev);
 }
 EXPORT_SYMBOL(usb_driver_release_interface);
 
@@ -789,7 +789,7 @@ EXPORT_SYMBOL_GPL_FUTURE(usb_deregister);
 
 #ifdef CONFIG_PM
 
-/* Caller has locked udev->pm_mutex */
+/* Caller has locked udev's pm_mutex */
 static int suspend_device(struct usb_device *udev, pm_message_t msg)
 {
 	struct usb_device_driver	*udriver;
@@ -816,7 +816,7 @@ done:
 	return status;
 }
 
-/* Caller has locked udev->pm_mutex */
+/* Caller has locked udev's pm_mutex */
 static int resume_device(struct usb_device *udev)
 {
 	struct usb_device_driver	*udriver;
@@ -842,7 +842,7 @@ done:
 	return status;
 }
 
-/* Caller has locked intf's usb_device's pm_mutex */
+/* Caller has locked intf's usb_device's pm mutex */
 static int suspend_interface(struct usb_interface *intf, pm_message_t msg)
 {
 	struct usb_driver	*driver;
@@ -1064,7 +1064,7 @@ int usb_resume_both(struct usb_device *udev)
 	/* Propagate the resume up the tree, if necessary */
 	if (udev->state == USB_STATE_SUSPENDED) {
 		if (parent) {
-			mutex_lock_nested(&parent->pm_mutex, parent->level);
+			usb_pm_lock(parent);
 			parent->auto_pm = 1;
 			status = usb_resume_both(parent);
 		} else {
@@ -1079,7 +1079,7 @@ int usb_resume_both(struct usb_device *udev)
 		if (status == 0)
 			status = resume_device(udev);
 		if (parent)
-			mutex_unlock(&parent->pm_mutex);
+			usb_pm_unlock(parent);
 	} else {
 
 		/* Needed only for setting udev->dev.power.power_state.event
@@ -1129,12 +1129,12 @@ int usb_resume_both(struct usb_device *udev)
  */
 void usb_autosuspend_device(struct usb_device *udev, int dec_usage_cnt)
 {
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	usb_pm_lock(udev);
 	udev->pm_usage_cnt -= dec_usage_cnt;
 	if (udev->pm_usage_cnt <= 0)
 		queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
 				USB_AUTOSUSPEND_DELAY);
-	mutex_unlock(&udev->pm_mutex);
+	usb_pm_unlock(udev);
 	// dev_dbg(&udev->dev, "%s: cnt %d\n",
 	//		__FUNCTION__, udev->pm_usage_cnt);
 }
@@ -1168,13 +1168,13 @@ int usb_autoresume_device(struct usb_device *udev, int inc_usage_cnt)
 {
 	int	status;
 
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	usb_pm_lock(udev);
 	udev->pm_usage_cnt += inc_usage_cnt;
 	udev->auto_pm = 1;
 	status = usb_resume_both(udev);
 	if (status != 0)
 		udev->pm_usage_cnt -= inc_usage_cnt;
-	mutex_unlock(&udev->pm_mutex);
+	usb_pm_unlock(udev);
 	// dev_dbg(&udev->dev, "%s: status %d cnt %d\n",
 	//		__FUNCTION__, status, udev->pm_usage_cnt);
 	return status;
@@ -1215,13 +1215,13 @@ void usb_autopm_put_interface(struct usb_interface *intf)
 {
 	struct usb_device	*udev = interface_to_usbdev(intf);
 
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	usb_pm_lock(udev);
 	if (intf->condition != USB_INTERFACE_UNBOUND &&
 			--intf->pm_usage_cnt <= 0) {
 		queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
 				USB_AUTOSUSPEND_DELAY);
 	}
-	mutex_unlock(&udev->pm_mutex);
+	usb_pm_unlock(udev);
 	// dev_dbg(&intf->dev, "%s: cnt %d\n",
 	//		__FUNCTION__, intf->pm_usage_cnt);
 }
@@ -1263,7 +1263,7 @@ int usb_autopm_get_interface(struct usb_interface *intf)
 	struct usb_device	*udev = interface_to_usbdev(intf);
 	int			status;
 
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	usb_pm_lock(udev);
 	if (intf->condition == USB_INTERFACE_UNBOUND)
 		status = -ENODEV;
 	else {
@@ -1273,7 +1273,7 @@ int usb_autopm_get_interface(struct usb_interface *intf)
 		if (status != 0)
 			--intf->pm_usage_cnt;
 	}
-	mutex_unlock(&udev->pm_mutex);
+	usb_pm_unlock(udev);
 	// dev_dbg(&intf->dev, "%s: status %d cnt %d\n",
 	//		__FUNCTION__, status, intf->pm_usage_cnt);
 	return status;
@@ -1289,10 +1289,10 @@ static int usb_suspend(struct device *dev, pm_message_t message)
 	if (is_usb_device(dev)) {
 		struct usb_device *udev = to_usb_device(dev);
 
-		mutex_lock_nested(&udev->pm_mutex, udev->level);
+		usb_pm_lock(udev);
 		udev->auto_pm = 0;
 		status = usb_suspend_both(udev, message);
-		mutex_unlock(&udev->pm_mutex);
+		usb_pm_unlock(udev);
 	} else
 		status = 0;
 	return status;
@@ -1305,10 +1305,10 @@ static int usb_resume(struct device *dev)
 	if (is_usb_device(dev)) {
 		struct usb_device *udev = to_usb_device(dev);
 
-		mutex_lock_nested(&udev->pm_mutex, udev->level);
+		usb_pm_lock(udev);
 		udev->auto_pm = 0;
 		status = usb_resume_both(udev);
-		mutex_unlock(&udev->pm_mutex);
+		usb_pm_unlock(udev);
 
 		/* Rebind drivers that had no suspend method? */
 	} else
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 2a8cb3c2b19c..7676690a0386 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1779,7 +1779,7 @@ static int remote_wakeup(struct usb_device *udev)
 	 * to the parent hub! */
 
 	usb_lock_device(udev);
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	usb_pm_lock(udev);
 	if (udev->state == USB_STATE_SUSPENDED) {
 		dev_dbg(&udev->dev, "usb %sresume\n", "wakeup-");
 		/* TRSMRCY = 10 msec */
@@ -1788,7 +1788,7 @@ static int remote_wakeup(struct usb_device *udev)
 		if (status == 0)
 			udev->dev.power.power_state.event = PM_EVENT_ON;
 	}
-	mutex_unlock(&udev->pm_mutex);
+	usb_pm_unlock(udev);
 
 	if (status == 0)
 		usb_autoresume_device(udev, 0);
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 239f8e5d247f..e4df9edf1bc0 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -214,10 +214,10 @@ static void usb_autosuspend_work(void *_udev)
 {
 	struct usb_device	*udev = _udev;
 
-	mutex_lock_nested(&udev->pm_mutex, udev->level);
+	usb_pm_lock(udev);
 	udev->auto_pm = 1;
 	usb_suspend_both(udev, PMSG_SUSPEND);
-	mutex_unlock(&udev->pm_mutex);
+	usb_pm_unlock(udev);
 }
 
 #else
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index fb6eb41c374f..f69df137ec0e 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -36,6 +36,16 @@ extern int usb_resume_both(struct usb_device *udev);
 extern int usb_port_suspend(struct usb_device *dev);
 extern int usb_port_resume(struct usb_device *dev);
 
+static inline void usb_pm_lock(struct usb_device *udev)
+{
+	mutex_lock_nested(&udev->pm_mutex, udev->level);
+}
+
+static inline void usb_pm_unlock(struct usb_device *udev)
+{
+	mutex_unlock(&udev->pm_mutex);
+}
+
 #else
 
 #define usb_suspend_both(udev, msg)	0
@@ -45,6 +55,8 @@ static inline int usb_resume_both(struct usb_device *udev)
 }
 #define usb_port_suspend(dev)		0
 #define usb_port_resume(dev)		0
+static inline void usb_pm_lock(struct usb_device *udev) {}
+static inline void usb_pm_unlock(struct usb_device *udev) {}
 
 #endif
 
@@ -58,7 +70,11 @@ extern int usb_autoresume_device(struct usb_device *udev, int inc_busy_cnt);
 #else
 
 #define usb_autosuspend_device(udev, dec_busy_cnt)	do {} while (0)
-#define usb_autoresume_device(udev, inc_busy_cnt)	0
+static inline int usb_autoresume_device(struct usb_device *udev,
+		int inc_busy_cnt)
+{
+	return 0;
+}
 
 #endif
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 0da15b0b02be..190cc1b78fe2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -380,10 +380,10 @@ struct usb_device {
 	int maxchild;			/* Number of ports if hub */
 	struct usb_device *children[USB_MAXCHILDREN];
 
+	int pm_usage_cnt;		/* usage counter for autosuspend */
 #ifdef CONFIG_PM
 	struct work_struct autosuspend;	/* for delayed autosuspends */
 	struct mutex pm_mutex;		/* protects PM operations */
-	int pm_usage_cnt;		/* usage counter for autosuspend */
 
 	unsigned auto_pm:1;		/* autosuspend/resume in progress */
 	unsigned do_remote_wakeup:1;	/* remote wakeup should be enabled */
-- 
cgit v1.2.3


From 2a50f28c326d20ab4556be1b867ecddf6aefbb88 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Sep 2006 21:22:08 -0700
Subject: [ATALK]: endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/ipddp.c |  5 +--
 include/linux/atalk.h         | 40 +---------------------
 net/appletalk/ddp.c           | 79 +++++++++++++++----------------------------
 3 files changed, 30 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 7f7dd450226a..b98592a8bac8 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -145,9 +145,7 @@ static int ipddp_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* Create the Extended DDP header */
 	ddp = (struct ddpehdr *)skb->data;
-        ddp->deh_len = skb->len;
-        ddp->deh_hops = 1;
-        ddp->deh_pad = 0;
+        ddp->deh_len_hops = htons(skb->len + (1<<10));
         ddp->deh_sum = 0;
 
 	/*
@@ -170,7 +168,6 @@ static int ipddp_xmit(struct sk_buff *skb, struct net_device *dev)
         ddp->deh_sport = 72;
 
         *((__u8 *)(ddp+1)) = 22;        	/* ddp type = IP */
-        *((__u16 *)ddp)=ntohs(*((__u16 *)ddp));	/* fix up length field */
 
         skb->protocol = htons(ETH_P_ATALK);     /* Protocol has changed */
 
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 6ba3aa8a81f4..75b8baca08f3 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -88,15 +88,7 @@ static inline struct atalk_sock *at_sk(struct sock *sk)
 #include <asm/byteorder.h>
 
 struct ddpehdr {
-#ifdef __LITTLE_ENDIAN_BITFIELD
-	__u16	deh_len:10,
-		deh_hops:4,
-		deh_pad:2;
-#else
-	__u16	deh_pad:2,
-		deh_hops:4,
-		deh_len:10;
-#endif
+	__be16	deh_len_hops;	/* lower 10 bits are length, next 4 - hops */
 	__be16	deh_sum;
 	__be16	deh_dnet;
 	__be16	deh_snet;
@@ -112,36 +104,6 @@ static __inline__ struct ddpehdr *ddp_hdr(struct sk_buff *skb)
 	return (struct ddpehdr *)skb->h.raw;
 }
 
-/*
- *	Don't drop the struct into the struct above.  You'll get some
- *	surprise padding.
- */
-struct ddpebits {
-#ifdef __LITTLE_ENDIAN_BITFIELD
-	__u16	deh_len:10,
-		deh_hops:4,
-		deh_pad:2;
-#else
-	__u16	deh_pad:2,
-		deh_hops:4,
-		deh_len:10;
-#endif
-};
-
-/* Short form header */
-struct ddpshdr {
-#ifdef __LITTLE_ENDIAN_BITFIELD
-	__u16	dsh_len:10,
-		dsh_pad:6;
-#else
-	__u16	dsh_pad:6,
-		dsh_len:10;
-#endif
-	__u8	dsh_dport;
-	__u8	dsh_sport;
-	/* And netatalk apps expect to stick the type in themselves */
-};
-
 /* AppleTalk AARP headers */
 struct elapaarp {
 	__be16	hw_type;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 96dc6bb52d14..708e2e0371af 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1002,7 +1002,7 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
 	return sum;
 }
 
-static unsigned short atalk_checksum(const struct sk_buff *skb, int len)
+static __be16 atalk_checksum(const struct sk_buff *skb, int len)
 {
 	unsigned long sum;
 
@@ -1010,7 +1010,7 @@ static unsigned short atalk_checksum(const struct sk_buff *skb, int len)
 	sum = atalk_sum_skb(skb, 4, len-4, 0);
 
 	/* Use 0xFFFF for 0. 0 itself means none */
-	return sum ? htons((unsigned short)sum) : 0xFFFF;
+	return sum ? htons((unsigned short)sum) : htons(0xFFFF);
 }
 
 static struct proto ddp_proto = {
@@ -1289,7 +1289,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb)
 #endif
 
 static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev,
-			       struct ddpehdr *ddp, struct ddpebits *ddphv,
+			       struct ddpehdr *ddp, __u16 len_hops,
 			       int origlen)
 {
 	struct atalk_route *rt;
@@ -1317,10 +1317,12 @@ static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev,
 
 	/* Route the packet */
 	rt = atrtr_find(&ta);
-	if (!rt || ddphv->deh_hops == DDP_MAXHOPS)
+	/* increment hops count */
+	len_hops += 1 << 10;
+	if (!rt || !(len_hops & (15 << 10)))
 		goto free_it;
+
 	/* FIXME: use skb->cb to be able to use shared skbs */
-	ddphv->deh_hops++;
 
 	/*
 	 * Route goes through another gateway, so set the target to the
@@ -1335,11 +1337,10 @@ static void atalk_route_packet(struct sk_buff *skb, struct net_device *dev,
         /* Fix up skb->len field */
         skb_trim(skb, min_t(unsigned int, origlen,
 			    (rt->dev->hard_header_len +
-			     ddp_dl->header_length + ddphv->deh_len)));
+			     ddp_dl->header_length + (len_hops & 1023))));
 
-	/* Mend the byte order */
 	/* FIXME: use skb->cb to be able to use shared skbs */
-	*((__u16 *)ddp) = ntohs(*((__u16 *)ddphv));
+	ddp->deh_len_hops = htons(len_hops);
 
 	/*
 	 * Send the buffer onwards
@@ -1394,7 +1395,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct atalk_iface *atif;
 	struct sockaddr_at tosat;
         int origlen;
-        struct ddpebits ddphv;
+	__u16 len_hops;
 
 	/* Don't mangle buffer if shared */
 	if (!(skb = skb_share_check(skb, GFP_ATOMIC))) 
@@ -1406,16 +1407,11 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	ddp = ddp_hdr(skb);
 
-	/*
-	 *	Fix up the length field	[Ok this is horrible but otherwise
-	 *	I end up with unions of bit fields and messy bit field order
-	 *	compiler/endian dependencies..]
-	 */
-	*((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp));
+	len_hops = ntohs(ddp->deh_len_hops);
 
 	/* Trim buffer in case of stray trailing data */
 	origlen = skb->len;
-	skb_trim(skb, min_t(unsigned int, skb->len, ddphv.deh_len));
+	skb_trim(skb, min_t(unsigned int, skb->len, len_hops & 1023));
 
 	/*
 	 * Size check to see if ddp->deh_len was crap
@@ -1430,7 +1426,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 	 * valid for net byte orders all over the networking code...
 	 */
 	if (ddp->deh_sum &&
-	    atalk_checksum(skb, ddphv.deh_len) != ddp->deh_sum)
+	    atalk_checksum(skb, len_hops & 1023) != ddp->deh_sum)
 		/* Not a valid AppleTalk frame - dustbin time */
 		goto freeit;
 
@@ -1444,7 +1440,7 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		/* Not ours, so we route the packet via the correct
 		 * AppleTalk iface
 		 */
-		atalk_route_packet(skb, dev, ddp, &ddphv, origlen);
+		atalk_route_packet(skb, dev, ddp, len_hops, origlen);
 		goto out;
 	}
 
@@ -1489,7 +1485,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		/* Find our address */
 		struct atalk_addr *ap = atalk_find_dev_addr(dev);
 
-		if (!ap || skb->len < sizeof(struct ddpshdr))
+		if (!ap || skb->len < sizeof(__be16) || skb->len > 1023)
 			goto freeit;
 
 		/* Don't mangle buffer if shared */
@@ -1519,11 +1515,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		/*
 		 * Not sure about this bit...
 		 */
-		ddp->deh_len   = skb->len;
-		ddp->deh_hops  = DDP_MAXHOPS;	/* Non routable, so force a drop
-						   if we slip up later */
-		/* Mend the byte order */
-		*((__u16 *)ddp) = htons(*((__u16 *)ddp));
+		/* Non routable, so force a drop if we slip up later */
+		ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10));
 	}
 	skb->h.raw = skb->data;
 
@@ -1622,16 +1615,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 	SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
 
 	ddp = (struct ddpehdr *)skb_put(skb, sizeof(struct ddpehdr));
-	ddp->deh_pad  = 0;
-	ddp->deh_hops = 0;
-	ddp->deh_len  = len + sizeof(*ddp);
-	/*
-	 * Fix up the length field [Ok this is horrible but otherwise
-	 * I end up with unions of bit fields and messy bit field order
-	 * compiler/endian dependencies..
-	 */
-	*((__u16 *)ddp) = ntohs(*((__u16 *)ddp));
-
+	ddp->deh_len_hops  = htons(len + sizeof(*ddp));
 	ddp->deh_dnet  = usat->sat_addr.s_net;
 	ddp->deh_snet  = at->src_net;
 	ddp->deh_dnode = usat->sat_addr.s_node;
@@ -1712,8 +1696,8 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 	struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name;
 	struct ddpehdr *ddp;
 	int copied = 0;
+	int offset = 0;
 	int err = 0;
-        struct ddpebits ddphv;
 	struct sk_buff *skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 						flags & MSG_DONTWAIT, &err);
 	if (!skb)
@@ -1721,25 +1705,18 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 
 	/* FIXME: use skb->cb to be able to use shared skbs */
 	ddp = ddp_hdr(skb);
-	*((__u16 *)&ddphv) = ntohs(*((__u16 *)ddp));
+	copied = ntohs(ddp->deh_len_hops) & 1023;
 
-	if (sk->sk_type == SOCK_RAW) {
-		copied = ddphv.deh_len;
-		if (copied > size) {
-			copied = size;
-			msg->msg_flags |= MSG_TRUNC;
-		}
+	if (sk->sk_type != SOCK_RAW) {
+		offset = sizeof(*ddp);
+		copied -= offset;
+	}
 
-		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
-	} else {
-		copied = ddphv.deh_len - sizeof(*ddp);
-		if (copied > size) {
-			copied = size;
-			msg->msg_flags |= MSG_TRUNC;
-		}
-		err = skb_copy_datagram_iovec(skb, sizeof(*ddp),
-					      msg->msg_iov, copied);
+	if (copied > size) {
+		copied = size;
+		msg->msg_flags |= MSG_TRUNC;
 	}
+	err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied);
 
 	if (!err) {
 		if (sat) {
-- 
cgit v1.2.3


From 0ac0760a57a6b1eb75c21a590e578be5dfc2f88b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Sep 2006 21:23:16 -0700
Subject: [TR]: endiannness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/trdevice.h |  2 +-
 net/802/tr.c             | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trdevice.h b/include/linux/trdevice.h
index 99e02ef54c47..bfc84a7aecc5 100644
--- a/include/linux/trdevice.h
+++ b/include/linux/trdevice.h
@@ -28,7 +28,7 @@
 #include <linux/if_tr.h>
 
 #ifdef __KERNEL__
-extern unsigned short	tr_type_trans(struct sk_buff *skb, struct net_device *dev);
+extern __be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern void tr_source_route(struct sk_buff *skb, struct trh_hdr *trh, struct net_device *dev);
 extern struct net_device *alloc_trdev(int sizeof_priv);
 
diff --git a/net/802/tr.c b/net/802/tr.c
index d7d8f40c4fed..829deb41ce81 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -164,7 +164,7 @@ static int tr_rebuild_header(struct sk_buff *skb)
 	 */
 	 
 	if(trllc->ethertype != htons(ETH_P_IP)) {
-		printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(trllc->ethertype));
+		printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(trllc->ethertype));
 		return 0;
 	}
 
@@ -186,7 +186,7 @@ static int tr_rebuild_header(struct sk_buff *skb)
  *	it via SNAP.
  */
  
-unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev) 
+__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 
 	struct trh_hdr *trh=(struct trh_hdr *)skb->data;
@@ -229,15 +229,15 @@ unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 */
 
 	if (trllc->dsap == EXTENDED_SAP &&
-	    (trllc->ethertype == ntohs(ETH_P_IP) ||
-	     trllc->ethertype == ntohs(ETH_P_IPV6) ||
-	     trllc->ethertype == ntohs(ETH_P_ARP)))
+	    (trllc->ethertype == htons(ETH_P_IP) ||
+	     trllc->ethertype == htons(ETH_P_IPV6) ||
+	     trllc->ethertype == htons(ETH_P_ARP)))
 	{
 		skb_pull(skb, sizeof(struct trllc));
 		return trllc->ethertype;
 	}
 
-	return ntohs(ETH_P_TR_802_2);
+	return htons(ETH_P_TR_802_2);
 }
 
 /*
-- 
cgit v1.2.3


From 046d033148e6936ee2466d38214cf0743a210f39 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Sep 2006 21:24:24 -0700
Subject: [IPV4]: headers endianness

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ip.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ip.h b/include/linux/ip.h
index 2f4600146f83..6b25d36fc54c 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -96,7 +96,7 @@ struct iphdr {
 	__be16	frag_off;
 	__u8	ttl;
 	__u8	protocol;
-	__u16	check;
+	__be16	check;
 	__be32	saddr;
 	__be32	daddr;
 	/*The options start here. */
@@ -105,22 +105,22 @@ struct iphdr {
 struct ip_auth_hdr {
 	__u8  nexthdr;
 	__u8  hdrlen;		/* This one is measured in 32 bit units! */
-	__u16 reserved;
-	__u32 spi;
-	__u32 seq_no;		/* Sequence number */
+	__be16 reserved;
+	__be32 spi;
+	__be32 seq_no;		/* Sequence number */
 	__u8  auth_data[0];	/* Variable len but >=4. Mind the 64 bit alignment! */
 };
 
 struct ip_esp_hdr {
-	__u32 spi;
-	__u32 seq_no;		/* Sequence number */
+	__be32 spi;
+	__be32 seq_no;		/* Sequence number */
 	__u8  enc_data[0];	/* Variable len but >=8. Mind the 64 bit alignment! */
 };
 
 struct ip_comp_hdr {
 	__u8 nexthdr;
 	__u8 flags;
-	__u16 cpi;
+	__be16 cpi;
 };
 
 #endif	/* _LINUX_IP_H */
-- 
cgit v1.2.3


From a61ced5d1c2e773620d7855ea2009d770c10a6e6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Sep 2006 21:27:54 -0700
Subject: [IPV4]: inet_select_addr() annotations

argument and return value are net-endian.  Annotated function and inferred
net-endian variables in callers.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h          | 2 +-
 net/ipv4/arp.c                      | 4 ++--
 net/ipv4/devinet.c                  | 4 ++--
 net/ipv4/icmp.c                     | 2 +-
 net/ipv4/ipvs/ip_vs_sync.c          | 2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +-
 net/ipv4/route.c                    | 4 ++--
 7 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 92297ff24e85..40d07d0b896b 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -110,7 +110,7 @@ extern int		devinet_ioctl(unsigned int cmd, void __user *);
 extern void		devinet_init(void);
 extern struct in_device *inetdev_init(struct net_device *dev);
 extern struct in_device	*inetdev_by_index(int);
-extern u32		inet_select_addr(const struct net_device *dev, u32 dst, int scope);
+extern __be32		inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
 extern u32		inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
 extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
 extern void		inet_forward_change(void);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 48e1ccb2ff55..db72339c21e1 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -330,10 +330,10 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
 
 static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 {
-	u32 saddr = 0;
+	__be32 saddr = 0;
 	u8  *dst_ha = NULL;
 	struct net_device *dev = neigh->dev;
-	u32 target = *(u32*)neigh->primary_key;
+	__be32 target = *(__be32*)neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
 	struct in_device *in_dev = in_dev_get(dev);
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 8e8d1f17d77a..bffbbecef455 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -876,9 +876,9 @@ out:
 	return done;
 }
 
-u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope)
+__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 {
-	u32 addr = 0;
+	__be32 addr = 0;
 	struct in_device *in_dev;
 
 	rcu_read_lock();
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c2ad07e48ab4..fd39685241c1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -437,7 +437,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info)
 	struct icmp_bxm icmp_param;
 	struct rtable *rt = (struct rtable *)skb_in->dst;
 	struct ipcm_cookie ipc;
-	u32 saddr;
+	__be32 saddr;
 	u8  tos;
 
 	if (!rt)
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 1bca714bda3d..0c8d20da6139 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -464,7 +464,7 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 static int bind_mcastif_addr(struct socket *sock, char *ifname)
 {
 	struct net_device *dev;
-	u32 addr;
+	__be32 addr;
 	struct sockaddr_in sin;
 
 	if ((dev = __dev_get_by_name(ifname)) == NULL)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index bc65168a3437..3dbfcfac8a84 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -70,7 +70,7 @@ masquerade_target(struct sk_buff **pskb,
 	const struct ip_nat_multi_range_compat *mr;
 	struct ip_nat_range newrange;
 	struct rtable *rt;
-	u_int32_t newsrc;
+	__be32 newsrc;
 
 	IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 32fcb77295f0..9d8fbf1e5bc3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1532,7 +1532,7 @@ static int ip_rt_bug(struct sk_buff *skb)
 
 void ip_rt_get_source(u8 *addr, struct rtable *rt)
 {
-	u32 src;
+	__be32 src;
 	struct fib_result res;
 
 	if (rt->fl.iif == 0)
@@ -1603,7 +1603,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
 	unsigned hash;
 	struct rtable *rth;
-	u32 spec_dst;
+	__be32 spec_dst;
 	struct in_device *in_dev = in_dev_get(dev);
 	u32 itag = 0;
 
-- 
cgit v1.2.3


From a60c4923da795c74db9ff61a60e2f1df5754e4ce Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Sep 2006 21:28:34 -0700
Subject: [IPV4]: ip_check_mc() annotations

annotated arguments

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 2 +-
 net/ipv4/igmp.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 899c3d4776f3..8e7eedb3a574 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -197,7 +197,7 @@ struct ip_mc_list
 #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value)
 #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value)
 
-extern int ip_check_mc(struct in_device *dev, u32 mc_addr, u32 src_addr, u16 proto);
+extern int ip_check_mc(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto);
 extern int igmp_rcv(struct sk_buff *);
 extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
 extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 58be8227b0cb..d8068c483781 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2216,7 +2216,7 @@ void ip_mc_drop_socket(struct sock *sk)
 	rtnl_unlock();
 }
 
-int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto)
+int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
 {
 	struct ip_mc_list *im;
 	struct ip_sf_list *psf;
-- 
cgit v1.2.3


From ff428d72c59b35e4ba34bc1b487e707648010fe3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Sep 2006 22:13:35 -0700
Subject: [IPV4]: inet_addr_onlink() annotated

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 2 +-
 net/ipv4/devinet.c         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 40d07d0b896b..5ae09372c144 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -105,7 +105,7 @@ extern int register_inetaddr_notifier(struct notifier_block *nb);
 extern int unregister_inetaddr_notifier(struct notifier_block *nb);
 
 extern struct net_device 	*ip_dev_find(u32 addr);
-extern int		inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b);
+extern int		inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
 extern int		devinet_ioctl(unsigned int cmd, void __user *);
 extern void		devinet_init(void);
 extern struct in_device *inetdev_init(struct net_device *dev);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index bffbbecef455..5988584f6a6d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -224,7 +224,7 @@ static void inetdev_destroy(struct in_device *in_dev)
 	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 }
 
-int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b)
+int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 {
 	rcu_read_lock();
 	for_primary_ifa(in_dev) {
-- 
cgit v1.2.3


From a144ea4b7a13087081ab5402fa9ad0bcfd249e67 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Sep 2006 18:00:55 -0700
Subject: [IPV4]: annotate struct in_ifaddr

ifa_local, ifa_address, ifa_mask, ifa_broadcast and ifa_anycast are
net-endian.  Annotated them and variables that are inferred to be
net-endian.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/ia64/hp/sim/simeth.c                    |  4 ++--
 arch/um/drivers/net_kern.c                   |  2 +-
 arch/xtensa/platform-iss/network.c           |  2 +-
 drivers/isdn/i4l/isdn_net.c                  |  4 ++--
 drivers/net/bonding/bond_main.c              |  2 +-
 drivers/net/wan/hdlc_cisco.c                 |  2 +-
 drivers/net/wan/syncppp.c                    |  2 +-
 drivers/net/wireless/strip.c                 |  4 ++--
 include/linux/inetdevice.h                   | 10 +++++-----
 net/ipv4/devinet.c                           |  4 ++--
 net/ipv4/fib_frontend.c                      | 12 ++++++------
 net/ipv4/icmp.c                              |  2 +-
 net/ipv4/netfilter/ip_conntrack_netbios_ns.c |  2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c            |  2 +-
 14 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index b5195be62818..e1a1b11473e2 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -320,7 +320,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 	}
 
 	printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n",
-	       dev->name, htonl(ifa->ifa_local));
+	       dev->name, ntohl(ifa->ifa_local));
 
 	/*
 	 * XXX Fix me
@@ -331,7 +331,7 @@ simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
 	local = dev->priv;
 	/* now do it for real */
 	r = event == NETDEV_UP ?
-		netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)):
+		netdev_attach(local->simfd, dev->irq, ntohl(ifa->ifa_local)):
 		netdev_detach(local->simfd);
 
 	printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n",
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 664c2e2fb820..bd1178fa4e9a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -825,7 +825,7 @@ int dev_netmask(void *d, void *m)
 	struct net_device *dev = d;
 	struct in_device *ip = dev->ip_ptr;
 	struct in_ifaddr *in;
-	__u32 *mask_out = m;
+	__be32 *mask_out = m;
 
 	if(ip == NULL) 
 		return(1);
diff --git a/arch/xtensa/platform-iss/network.c b/arch/xtensa/platform-iss/network.c
index d96164e602fe..15d64414bd60 100644
--- a/arch/xtensa/platform-iss/network.c
+++ b/arch/xtensa/platform-iss/network.c
@@ -201,7 +201,7 @@ static void dev_ip_addr(void *d, char *buf, char *bin_buf)
 	struct net_device *dev = d;
 	struct in_device *ip = dev->ip_ptr;
 	struct in_ifaddr *in;
-	u32 addr;
+	__be32 addr;
 
 	if ((ip == NULL) || ((in = ip->ifa_list) == NULL)) {
 		printk(KERN_WARNING "Device not assigned an IP address!\n");
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 43da8ae1b2ad..1f8d6ae66b41 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -1614,8 +1614,8 @@ isdn_net_ciscohdlck_slarp_send_reply(isdn_net_local *lp)
 	struct sk_buff *skb;
 	unsigned char *p;
 	struct in_device *in_dev = NULL;
-	u32 addr = 0;		/* local ipv4 address */
-	u32 mask = 0;		/* local netmask */
+	__be32 addr = 0;		/* local ipv4 address */
+	__be32 mask = 0;		/* local netmask */
 
 	if ((in_dev = lp->netdev->dev.ip_ptr) != NULL) {
 		/* take primary(first) address of interface */
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 0fb5f653d3ce..c0bbddae4ec4 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2252,7 +2252,7 @@ static u32 bond_glean_dev_ip(struct net_device *dev)
 {
 	struct in_device *idev;
 	struct in_ifaddr *ifa;
-	u32 addr = 0;
+	__be32 addr = 0;
 
 	if (!dev)
 		return 0;
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index 7ec2b2f9b7ee..b0bc5ddcf1b1 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -161,7 +161,7 @@ static int cisco_rx(struct sk_buff *skb)
 	struct hdlc_header *data = (struct hdlc_header*)skb->data;
 	struct cisco_packet *cisco_data;
 	struct in_device *in_dev;
-	u32 addr, mask;
+	__be32 addr, mask;
 
 	if (skb->len < sizeof(struct hdlc_header))
 		goto rx_error;
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c
index c13b459a0137..d1173089f334 100644
--- a/drivers/net/wan/syncppp.c
+++ b/drivers/net/wan/syncppp.c
@@ -763,7 +763,7 @@ static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb)
 		{
 		struct in_device *in_dev;
 		struct in_ifaddr *ifa;
-		u32 addr = 0, mask = ~0; /* FIXME: is the mask correct? */
+		__be32 addr = 0, mask = ~0; /* FIXME: is the mask correct? */
 #ifdef CONFIG_INET
 		rcu_read_lock();
 		if ((in_dev = __in_dev_get_rcu(dev)) != NULL)
diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c
index ccaf28e8db0a..337c692f6fd6 100644
--- a/drivers/net/wireless/strip.c
+++ b/drivers/net/wireless/strip.c
@@ -1342,7 +1342,7 @@ static unsigned char *strip_make_packet(unsigned char *buffer,
 	 * 'broadcast hub' radio (First byte of address being 0xFF means broadcast)
 	 */
 	if (haddr.c[0] == 0xFF) {
-		u32 brd = 0;
+		__be32 brd = 0;
 		struct in_device *in_dev;
 
 		rcu_read_lock();
@@ -1406,7 +1406,7 @@ static void strip_send(struct strip *strip_info, struct sk_buff *skb)
 	int doreset = (long) jiffies - strip_info->watchdog_doreset >= 0;
 	int doprobe = (long) jiffies - strip_info->watchdog_doprobe >= 0
 	    && !doreset;
-	u32 addr, brd;
+	__be32 addr, brd;
 
 	/*
 	 * 1. If we have a packet, encapsulate it and put it in the buffer
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 5ae09372c144..54b32e8b8f65 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -90,11 +90,11 @@ struct in_ifaddr
 	struct in_ifaddr	*ifa_next;
 	struct in_device	*ifa_dev;
 	struct rcu_head		rcu_head;
-	u32			ifa_local;
-	u32			ifa_address;
-	u32			ifa_mask;
-	u32			ifa_broadcast;
-	u32			ifa_anycast;
+	__be32			ifa_local;
+	__be32			ifa_address;
+	__be32			ifa_mask;
+	__be32			ifa_broadcast;
+	__be32			ifa_anycast;
 	unsigned char		ifa_scope;
 	unsigned char		ifa_flags;
 	unsigned char		ifa_prefixlen;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5988584f6a6d..a0a7780e7515 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -805,7 +805,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
 			break;
 		ret = 0;
 		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
-			u32 old_mask = ifa->ifa_mask;
+			__be32 old_mask = ifa->ifa_mask;
 			inet_del_ifa(in_dev, ifap, 0);
 			ifa->ifa_mask = sin->sin_addr.s_addr;
 			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
@@ -931,7 +931,7 @@ static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst,
 			      u32 local, int scope)
 {
 	int same = 0;
-	u32 addr = 0;
+	__be32 addr = 0;
 
 	for_ifa(in_dev) {
 		if (!addr &&
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7f5217907e5a..62ee71ee6bc9 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -667,9 +667,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct net_device *dev = in_dev->dev;
 	struct in_ifaddr *prim = ifa;
-	u32 mask = ifa->ifa_mask;
-	u32 addr = ifa->ifa_local;
-	u32 prefix = ifa->ifa_address&mask;
+	__be32 mask = ifa->ifa_mask;
+	__be32 addr = ifa->ifa_local;
+	__be32 prefix = ifa->ifa_address&mask;
 
 	if (ifa->ifa_flags&IFA_F_SECONDARY) {
 		prim = inet_ifa_byprefix(in_dev, prefix, mask);
@@ -685,7 +685,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 		return;
 
 	/* Add broadcast address, if it is explicitly assigned. */
-	if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
+	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
 		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 
 	if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
@@ -707,8 +707,8 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
 	struct net_device *dev = in_dev->dev;
 	struct in_ifaddr *ifa1;
 	struct in_ifaddr *prim = ifa;
-	u32 brd = ifa->ifa_address|~ifa->ifa_mask;
-	u32 any = ifa->ifa_address&ifa->ifa_mask;
+	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
+	__be32 any = ifa->ifa_address&ifa->ifa_mask;
 #define LOCAL_OK	1
 #define BRD_OK		2
 #define BRD0_OK		4
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fd39685241c1..428f1c91ec45 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -895,7 +895,7 @@ static void icmp_address_reply(struct sk_buff *skb)
 	if (in_dev->ifa_list &&
 	    IN_DEV_LOG_MARTIANS(in_dev) &&
 	    IN_DEV_FORWARD(in_dev)) {
-		u32 _mask, *mp;
+		__be32 _mask, *mp;
 
 		mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask);
 		BUG_ON(mp == NULL);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 3d0b438783db..4adec47aae32 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -48,7 +48,7 @@ static int help(struct sk_buff **pskb,
 	struct iphdr *iph = (*pskb)->nh.iph;
 	struct rtable *rt = (struct rtable *)(*pskb)->dst;
 	struct in_device *in_dev;
-	u_int32_t mask = 0;
+	__be32 mask = 0;
 
 	/* we're only interested in locally generated packets */
 	if ((*pskb)->sk == NULL)
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f03d43671c6d..c0dcfe9d610c 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -61,7 +61,7 @@ redirect_target(struct sk_buff **pskb,
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
-	u_int32_t newdst;
+	__be32 newdst;
 	const struct ip_nat_multi_range_compat *mr = targinfo;
 	struct ip_nat_range newrange;
 
-- 
cgit v1.2.3


From 60cad5da5791ceb0beefe9a79b570cca45791f50 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 26 Sep 2006 22:17:09 -0700
Subject: [IPV4]: annotate inetdev.h helpers

inet_confirm_addr(), inet_ifa_byprefix(), ip_dev_find(), inet_make_mask() and
inet_ifa_match() annotated, along with inferred net-endian variables

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/addr.c |  4 ++--
 include/linux/inetdevice.h     | 10 +++++-----
 net/ipv4/devinet.c             | 12 ++++++------
 net/ipv4/fib_frontend.c        |  2 +-
 4 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 9cbf09e2052f..60d3fbdd216c 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -86,7 +86,7 @@ EXPORT_SYMBOL(rdma_copy_addr);
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
 	struct net_device *dev;
-	u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+	__be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
 	int ret;
 
 	dev = ip_dev_find(ip);
@@ -239,7 +239,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
 {
 	struct net_device *dev;
 	u32 src_ip = src_in->sin_addr.s_addr;
-	u32 dst_ip = dst_in->sin_addr.s_addr;
+	__be32 dst_ip = dst_in->sin_addr.s_addr;
 	int ret;
 
 	dev = ip_dev_find(dst_ip);
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 54b32e8b8f65..5a0ab04627bc 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -104,18 +104,18 @@ struct in_ifaddr
 extern int register_inetaddr_notifier(struct notifier_block *nb);
 extern int unregister_inetaddr_notifier(struct notifier_block *nb);
 
-extern struct net_device 	*ip_dev_find(u32 addr);
+extern struct net_device 	*ip_dev_find(__be32 addr);
 extern int		inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
 extern int		devinet_ioctl(unsigned int cmd, void __user *);
 extern void		devinet_init(void);
 extern struct in_device *inetdev_init(struct net_device *dev);
 extern struct in_device	*inetdev_by_index(int);
 extern __be32		inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
-extern u32		inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope);
-extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, u32 mask);
+extern __be32		inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope);
+extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask);
 extern void		inet_forward_change(void);
 
-static __inline__ int inet_ifa_match(u32 addr, struct in_ifaddr *ifa)
+static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
 {
 	return !((addr^ifa->ifa_address)&ifa->ifa_mask);
 }
@@ -183,7 +183,7 @@ static inline void in_dev_put(struct in_device *idev)
 
 #endif /* __KERNEL__ */
 
-static __inline__ __u32 inet_make_mask(int logmask)
+static __inline__ __be32 inet_make_mask(int logmask)
 {
 	if (logmask)
 		return htonl(~((1<<(32-logmask))-1));
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index afba56222fb5..7602c79a389b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -429,8 +429,8 @@ struct in_device *inetdev_by_index(int ifindex)
 
 /* Called only from RTNL semaphored context. No locks. */
 
-struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix,
-				    u32 mask)
+struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
+				    __be32 mask)
 {
 	ASSERT_RTNL();
 
@@ -927,8 +927,8 @@ out:
 	return addr;
 }
 
-static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst,
-			      u32 local, int scope)
+static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
+			      __be32 local, int scope)
 {
 	int same = 0;
 	__be32 addr = 0;
@@ -971,9 +971,9 @@ static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst,
  * - local: address, 0=autoselect the local address
  * - scope: maximum allowed scope value for the local address
  */
-u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope)
+__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
 {
-	u32 addr = 0;
+	__be32 addr = 0;
 	struct in_device *in_dev;
 
 	if (dev) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 62ee71ee6bc9..c0bd2f122da2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -122,7 +122,7 @@ static void fib_flush(void)
  *	Find the first device with a given source address.
  */
 
-struct net_device * ip_dev_find(u32 addr)
+struct net_device * ip_dev_find(__be32 addr)
 {
 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 	struct fib_result res;
-- 
cgit v1.2.3


From 7699431301b189fca7ccbb64fe54e5a5170f8497 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 26 Sep 2006 22:28:46 -0700
Subject: [SUNRPC]: svc_{get,put}nl()

* add svc_getnl():
	Take network-endian value from buffer, convert to host-endian
	and return it.
* add svc_putnl():
	Take host-endian value, convert to network-endian and put it
	into a buffer.
* annotate svc_getu32()/svc_putu32() as dealing with network-endian.
* convert to svc_getnl(), svc_putnl().

[AV: in large part it's a carved-up Alexey's patch]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sunrpc/svc.h        | 31 +++++++++++++++++------
 net/sunrpc/auth_gss/svcauth_gss.c | 52 +++++++++++++++++++--------------------
 net/sunrpc/svc.c                  | 44 ++++++++++++++++-----------------
 net/sunrpc/svcauth.c              |  2 +-
 net/sunrpc/svcauth_unix.c         | 22 ++++++++---------
 5 files changed, 84 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 7b27c09b5604..5df1d319f5d5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -78,28 +78,45 @@ struct svc_serv {
  */
 #define RPCSVC_MAXPAGES		((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 2)
 
-static inline u32 svc_getu32(struct kvec *iov)
+static inline u32 svc_getnl(struct kvec *iov)
 {
-	u32 val, *vp;
+	__be32 val, *vp;
 	vp = iov->iov_base;
 	val = *vp++;
 	iov->iov_base = (void*)vp;
-	iov->iov_len -= sizeof(u32);
+	iov->iov_len -= sizeof(__be32);
+	return ntohl(val);
+}
+
+static inline void svc_putnl(struct kvec *iov, u32 val)
+{
+	__be32 *vp = iov->iov_base + iov->iov_len;
+	*vp = htonl(val);
+	iov->iov_len += sizeof(__be32);
+}
+
+static inline __be32 svc_getu32(struct kvec *iov)
+{
+	__be32 val, *vp;
+	vp = iov->iov_base;
+	val = *vp++;
+	iov->iov_base = (void*)vp;
+	iov->iov_len -= sizeof(__be32);
 	return val;
 }
 
 static inline void svc_ungetu32(struct kvec *iov)
 {
-	u32 *vp = (u32 *)iov->iov_base;
+	__be32 *vp = (__be32 *)iov->iov_base;
 	iov->iov_base = (void *)(vp - 1);
 	iov->iov_len += sizeof(*vp);
 }
 
-static inline void svc_putu32(struct kvec *iov, u32 val)
+static inline void svc_putu32(struct kvec *iov, __be32 val)
 {
-	u32 *vp = iov->iov_base + iov->iov_len;
+	__be32 *vp = iov->iov_base + iov->iov_len;
 	*vp = val;
-	iov->iov_len += sizeof(u32);
+	iov->iov_len += sizeof(__be32);
 }
 
 	
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 94217ec9e2dd..cd3d77ca3a31 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -607,7 +607,7 @@ svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o)
 
 	if (argv->iov_len < 4)
 		return -1;
-	o->len = ntohl(svc_getu32(argv));
+	o->len = svc_getnl(argv);
 	l = round_up_to_quad(o->len);
 	if (argv->iov_len < l)
 		return -1;
@@ -624,7 +624,7 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
 
 	if (resv->iov_len + 4 > PAGE_SIZE)
 		return -1;
-	svc_putu32(resv, htonl(o->len));
+	svc_putnl(resv, o->len);
 	p = resv->iov_base + resv->iov_len;
 	resv->iov_len += round_up_to_quad(o->len);
 	if (resv->iov_len > PAGE_SIZE)
@@ -657,7 +657,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
 	*authp = rpc_autherr_badverf;
 	if (argv->iov_len < 4)
 		return SVC_DENIED;
-	flavor = ntohl(svc_getu32(argv));
+	flavor = svc_getnl(argv);
 	if (flavor != RPC_AUTH_GSS)
 		return SVC_DENIED;
 	if (svc_safe_getnetobj(argv, &checksum))
@@ -689,7 +689,7 @@ gss_write_null_verf(struct svc_rqst *rqstp)
 {
 	u32     *p;
 
-	svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_NULL));
+	svc_putnl(rqstp->rq_res.head, RPC_AUTH_NULL);
 	p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
 	/* don't really need to check if head->iov_len > PAGE_SIZE ... */
 	*p++ = 0;
@@ -708,7 +708,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
 	u32			*p;
 	struct kvec		iov;
 
-	svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS));
+	svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS);
 	xdr_seq = htonl(seq);
 
 	iov.iov_base = &xdr_seq;
@@ -805,7 +805,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
 	struct xdr_netobj mic;
 	struct xdr_buf integ_buf;
 
-	integ_len = ntohl(svc_getu32(&buf->head[0]));
+	integ_len = svc_getnl(&buf->head[0]);
 	if (integ_len & 3)
 		goto out;
 	if (integ_len > buf->len)
@@ -825,7 +825,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
 	maj_stat = gss_verify_mic(ctx, &integ_buf, &mic);
 	if (maj_stat != GSS_S_COMPLETE)
 		goto out;
-	if (ntohl(svc_getu32(&buf->head[0])) != seq)
+	if (svc_getnl(&buf->head[0]) != seq)
 		goto out;
 	stat = 0;
 out:
@@ -857,7 +857,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
 
 	rqstp->rq_sendfile_ok = 0;
 
-	priv_len = ntohl(svc_getu32(&buf->head[0]));
+	priv_len = svc_getnl(&buf->head[0]);
 	if (rqstp->rq_deferred) {
 		/* Already decrypted last time through! The sequence number
 		 * check at out_seq is unnecessary but harmless: */
@@ -895,7 +895,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
 	if (maj_stat != GSS_S_COMPLETE)
 		return -EINVAL;
 out_seq:
-	if (ntohl(svc_getu32(&buf->head[0])) != seq)
+	if (svc_getnl(&buf->head[0]) != seq)
 		return -EINVAL;
 	return 0;
 }
@@ -985,12 +985,12 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
 
 	if (argv->iov_len < 5 * 4)
 		goto auth_err;
-	crlen = ntohl(svc_getu32(argv));
-	if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION)
+	crlen = svc_getnl(argv);
+	if (svc_getnl(argv) != RPC_GSS_VERSION)
 		goto auth_err;
-	gc->gc_proc = ntohl(svc_getu32(argv));
-	gc->gc_seq = ntohl(svc_getu32(argv));
-	gc->gc_svc = ntohl(svc_getu32(argv));
+	gc->gc_proc = svc_getnl(argv);
+	gc->gc_seq = svc_getnl(argv);
+	gc->gc_svc = svc_getnl(argv);
 	if (svc_safe_getnetobj(argv, &gc->gc_ctx))
 		goto auth_err;
 	if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4)
@@ -1016,9 +1016,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
 	case RPC_GSS_PROC_CONTINUE_INIT:
 		if (argv->iov_len < 2 * 4)
 			goto auth_err;
-		if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL)
+		if (svc_getnl(argv) != RPC_AUTH_NULL)
 			goto auth_err;
-		if (ntohl(svc_getu32(argv)) != 0)
+		if (svc_getnl(argv) != 0)
 			goto auth_err;
 		break;
 	case RPC_GSS_PROC_DATA:
@@ -1076,14 +1076,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
 				goto drop;
 			if (resv->iov_len + 4 > PAGE_SIZE)
 				goto drop;
-			svc_putu32(resv, rpc_success);
+			svc_putnl(resv, RPC_SUCCESS);
 			if (svc_safe_putnetobj(resv, &rsip->out_handle))
 				goto drop;
 			if (resv->iov_len + 3 * 4 > PAGE_SIZE)
 				goto drop;
-			svc_putu32(resv, htonl(rsip->major_status));
-			svc_putu32(resv, htonl(rsip->minor_status));
-			svc_putu32(resv, htonl(GSS_SEQ_WIN));
+			svc_putnl(resv, rsip->major_status);
+			svc_putnl(resv, rsip->minor_status);
+			svc_putnl(resv, GSS_SEQ_WIN);
 			if (svc_safe_putnetobj(resv, &rsip->out_token))
 				goto drop;
 			rqstp->rq_client = NULL;
@@ -1093,7 +1093,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
 		set_bit(CACHE_NEGATIVE, &rsci->h.flags);
 		if (resv->iov_len + 4 > PAGE_SIZE)
 			goto drop;
-		svc_putu32(resv, rpc_success);
+		svc_putnl(resv, RPC_SUCCESS);
 		goto complete;
 	case RPC_GSS_PROC_DATA:
 		*authp = rpcsec_gsserr_ctxproblem;
@@ -1111,8 +1111,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
 				goto auth_err;
 			/* placeholders for length and seq. number: */
 			svcdata->body_start = resv->iov_base + resv->iov_len;
-			svc_putu32(resv, 0);
-			svc_putu32(resv, 0);
+			svc_putnl(resv, 0);
+			svc_putnl(resv, 0);
 			break;
 		case RPC_GSS_SVC_PRIVACY:
 			if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
@@ -1120,8 +1120,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
 				goto auth_err;
 			/* placeholders for length and seq. number: */
 			svcdata->body_start = resv->iov_base + resv->iov_len;
-			svc_putu32(resv, 0);
-			svc_putu32(resv, 0);
+			svc_putnl(resv, 0);
+			svc_putnl(resv, 0);
 			break;
 		default:
 			goto auth_err;
@@ -1199,7 +1199,7 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
 	mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
 	if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic))
 		goto out_err;
-	svc_putu32(resv, htonl(mic.len));
+	svc_putnl(resv, mic.len);
 	memset(mic.data + mic.len, 0,
 			round_up_to_quad(mic.len) - mic.len);
 	resv->iov_len += XDR_QUADLEN(mic.len) << 2;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b76a227dd3ad..6589e1ad47fa 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -284,16 +284,16 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
 	rqstp->rq_sendfile_ok = 1;
 	/* tcp needs a space for the record length... */
 	if (rqstp->rq_prot == IPPROTO_TCP)
-		svc_putu32(resv, 0);
+		svc_putnl(resv, 0);
 
 	rqstp->rq_xid = svc_getu32(argv);
 	svc_putu32(resv, rqstp->rq_xid);
 
-	dir  = ntohl(svc_getu32(argv));
-	vers = ntohl(svc_getu32(argv));
+	dir  = svc_getnl(argv);
+	vers = svc_getnl(argv);
 
 	/* First words of reply: */
-	svc_putu32(resv, xdr_one);		/* REPLY */
+	svc_putnl(resv, 1);		/* REPLY */
 
 	if (dir != 0)		/* direction != CALL */
 		goto err_bad_dir;
@@ -303,11 +303,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
 	/* Save position in case we later decide to reject: */
 	accept_statp = resv->iov_base + resv->iov_len;
 
-	svc_putu32(resv, xdr_zero);		/* ACCEPT */
+	svc_putnl(resv, 0);		/* ACCEPT */
 
-	rqstp->rq_prog = prog = ntohl(svc_getu32(argv));	/* program number */
-	rqstp->rq_vers = vers = ntohl(svc_getu32(argv));	/* version number */
-	rqstp->rq_proc = proc = ntohl(svc_getu32(argv));	/* procedure number */
+	rqstp->rq_prog = prog = svc_getnl(argv);	/* program number */
+	rqstp->rq_vers = vers = svc_getnl(argv);	/* version number */
+	rqstp->rq_proc = proc = svc_getnl(argv);	/* procedure number */
 
 	progp = serv->sv_program;
 
@@ -361,7 +361,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
 
 	/* Build the reply header. */
 	statp = resv->iov_base +resv->iov_len;
-	svc_putu32(resv, rpc_success);		/* RPC_SUCCESS */
+	svc_putnl(resv, RPC_SUCCESS);
 
 	/* Bump per-procedure stats counter */
 	procp->pc_count++;
@@ -439,10 +439,10 @@ err_bad_dir:
 
 err_bad_rpc:
 	serv->sv_stats->rpcbadfmt++;
-	svc_putu32(resv, xdr_one);	/* REJECT */
-	svc_putu32(resv, xdr_zero);	/* RPC_MISMATCH */
-	svc_putu32(resv, xdr_two);	/* Only RPCv2 supported */
-	svc_putu32(resv, xdr_two);
+	svc_putnl(resv, 1);	/* REJECT */
+	svc_putnl(resv, 0);	/* RPC_MISMATCH */
+	svc_putnl(resv, 2);	/* Only RPCv2 supported */
+	svc_putnl(resv, 2);
 	goto sendit;
 
 err_bad_auth:
@@ -450,15 +450,15 @@ err_bad_auth:
 	serv->sv_stats->rpcbadauth++;
 	/* Restore write pointer to location of accept status: */
 	xdr_ressize_check(rqstp, accept_statp);
-	svc_putu32(resv, xdr_one);	/* REJECT */
-	svc_putu32(resv, xdr_one);	/* AUTH_ERROR */
-	svc_putu32(resv, auth_stat);	/* status */
+	svc_putnl(resv, 1);	/* REJECT */
+	svc_putnl(resv, 1);	/* AUTH_ERROR */
+	svc_putnl(resv, ntohl(auth_stat));	/* status */
 	goto sendit;
 
 err_bad_prog:
 	dprintk("svc: unknown program %d\n", prog);
 	serv->sv_stats->rpcbadfmt++;
-	svc_putu32(resv, rpc_prog_unavail);
+	svc_putnl(resv, RPC_PROG_UNAVAIL);
 	goto sendit;
 
 err_bad_vers:
@@ -466,9 +466,9 @@ err_bad_vers:
 	printk("svc: unknown version (%d)\n", vers);
 #endif
 	serv->sv_stats->rpcbadfmt++;
-	svc_putu32(resv, rpc_prog_mismatch);
-	svc_putu32(resv, htonl(progp->pg_lovers));
-	svc_putu32(resv, htonl(progp->pg_hivers));
+	svc_putnl(resv, RPC_PROG_MISMATCH);
+	svc_putnl(resv, progp->pg_lovers);
+	svc_putnl(resv, progp->pg_hivers);
 	goto sendit;
 
 err_bad_proc:
@@ -476,7 +476,7 @@ err_bad_proc:
 	printk("svc: unknown procedure (%d)\n", proc);
 #endif
 	serv->sv_stats->rpcbadfmt++;
-	svc_putu32(resv, rpc_proc_unavail);
+	svc_putnl(resv, RPC_PROC_UNAVAIL);
 	goto sendit;
 
 err_garbage:
@@ -486,6 +486,6 @@ err_garbage:
 	rpc_stat = rpc_garbage_args;
 err_bad:
 	serv->sv_stats->rpcbadfmt++;
-	svc_putu32(resv, rpc_stat);
+	svc_putnl(resv, ntohl(rpc_stat));
 	goto sendit;
 }
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 5b28c6176806..3f4d1f622de8 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -42,7 +42,7 @@ svc_authenticate(struct svc_rqst *rqstp, u32 *authp)
 
 	*authp = rpc_auth_ok;
 
-	flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0]));
+	flavor = svc_getnl(&rqstp->rq_arg.head[0]);
 
 	dprintk("svc: svc_authenticate (%d)\n", flavor);
 
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 7e5707e2d6b6..27f443b44af8 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -427,7 +427,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
 		*authp = rpc_autherr_badcred;
 		return SVC_DENIED;
 	}
-	if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
+	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
 		dprintk("svc: bad null verf\n");
 		*authp = rpc_autherr_badverf;
 		return SVC_DENIED;
@@ -441,8 +441,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
 		return SVC_DROP; /* kmalloc failure - client must retry */
 
 	/* Put NULL verifier */
-	svc_putu32(resv, RPC_AUTH_NULL);
-	svc_putu32(resv, 0);
+	svc_putnl(resv, RPC_AUTH_NULL);
+	svc_putnl(resv, 0);
 
 	return SVC_OK;
 }
@@ -488,31 +488,31 @@ svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp)
 
 	svc_getu32(argv);			/* length */
 	svc_getu32(argv);			/* time stamp */
-	slen = XDR_QUADLEN(ntohl(svc_getu32(argv)));	/* machname length */
+	slen = XDR_QUADLEN(svc_getnl(argv));	/* machname length */
 	if (slen > 64 || (len -= (slen + 3)*4) < 0)
 		goto badcred;
 	argv->iov_base = (void*)((u32*)argv->iov_base + slen);	/* skip machname */
 	argv->iov_len -= slen*4;
 
-	cred->cr_uid = ntohl(svc_getu32(argv));		/* uid */
-	cred->cr_gid = ntohl(svc_getu32(argv));		/* gid */
-	slen = ntohl(svc_getu32(argv));			/* gids length */
+	cred->cr_uid = svc_getnl(argv);		/* uid */
+	cred->cr_gid = svc_getnl(argv);		/* gid */
+	slen = svc_getnl(argv);			/* gids length */
 	if (slen > 16 || (len -= (slen + 2)*4) < 0)
 		goto badcred;
 	cred->cr_group_info = groups_alloc(slen);
 	if (cred->cr_group_info == NULL)
 		return SVC_DROP;
 	for (i = 0; i < slen; i++)
-		GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv));
+		GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
 
-	if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
+	if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
 		*authp = rpc_autherr_badverf;
 		return SVC_DENIED;
 	}
 
 	/* Put NULL verifier */
-	svc_putu32(resv, RPC_AUTH_NULL);
-	svc_putu32(resv, 0);
+	svc_putnl(resv, RPC_AUTH_NULL);
+	svc_putnl(resv, 0);
 
 	return SVC_OK;
 
-- 
cgit v1.2.3


From d8ed029d6000ba2e2908d9286409e4833c091b4c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 26 Sep 2006 22:29:38 -0700
Subject: [SUNRPC]: trivial endianness annotations

pure s/u32/__be32/

[AV: large part based on Alexey's patches]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sunrpc/auth.h         | 16 +++++------
 include/linux/sunrpc/msg_prot.h     |  2 +-
 include/linux/sunrpc/svc.h          | 14 +++++-----
 include/linux/sunrpc/svcauth.h      |  4 +--
 include/linux/sunrpc/xdr.h          | 38 +++++++++++++-------------
 include/linux/sunrpc/xprt.h         | 12 ++++-----
 net/sunrpc/auth.c                   | 12 ++++-----
 net/sunrpc/auth_gss/auth_gss.c      | 33 ++++++++++++-----------
 net/sunrpc/auth_gss/gss_krb5_seal.c |  2 +-
 net/sunrpc/auth_gss/gss_krb5_wrap.c |  4 +--
 net/sunrpc/auth_gss/svcauth_gss.c   | 24 ++++++++---------
 net/sunrpc/auth_null.c              |  8 +++---
 net/sunrpc/auth_unix.c              | 10 +++----
 net/sunrpc/clnt.c                   | 23 ++++++++--------
 net/sunrpc/pmap_clnt.c              |  6 ++---
 net/sunrpc/svc.c                    |  8 +++---
 net/sunrpc/svcauth.c                |  2 +-
 net/sunrpc/svcauth_unix.c           |  8 +++---
 net/sunrpc/svcsock.c                |  2 +-
 net/sunrpc/xdr.c                    | 54 ++++++++++++++++++-------------------
 net/sunrpc/xprt.c                   |  4 +--
 net/sunrpc/xprtsock.c               |  3 ++-
 22 files changed, 146 insertions(+), 143 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index a6de332e57d4..862c0d8c8381 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -109,13 +109,13 @@ struct rpc_credops {
 	void			(*crdestroy)(struct rpc_cred *);
 
 	int			(*crmatch)(struct auth_cred *, struct rpc_cred *, int);
-	u32 *			(*crmarshal)(struct rpc_task *, u32 *);
+	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
 	int			(*crrefresh)(struct rpc_task *);
-	u32 *			(*crvalidate)(struct rpc_task *, u32 *);
+	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
 	int			(*crwrap_req)(struct rpc_task *, kxdrproc_t,
-						void *, u32 *, void *);
+						void *, __be32 *, void *);
 	int			(*crunwrap_resp)(struct rpc_task *, kxdrproc_t,
-						void *, u32 *, void *);
+						void *, __be32 *, void *);
 };
 
 extern struct rpc_authops	authunix_ops;
@@ -134,10 +134,10 @@ struct rpc_cred *	rpcauth_bindcred(struct rpc_task *);
 void			rpcauth_holdcred(struct rpc_task *);
 void			put_rpccred(struct rpc_cred *);
 void			rpcauth_unbindcred(struct rpc_task *);
-u32 *			rpcauth_marshcred(struct rpc_task *, u32 *);
-u32 *			rpcauth_checkverf(struct rpc_task *, u32 *);
-int			rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, u32 *data, void *obj);
-int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, u32 *data, void *obj);
+__be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
+__be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
+int			rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, __be32 *data, void *obj);
+int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, __be32 *data, void *obj);
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index f43f237360ae..d9f5934ac9fe 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -95,7 +95,7 @@ enum rpc_auth_stat {
  * 2GB.
  */
 
-typedef u32	rpc_fraghdr;
+typedef __be32	rpc_fraghdr;
 
 #define	RPC_LAST_STREAM_FRAGMENT	(1U << 31)
 #define	RPC_FRAGMENT_SIZE_MASK		(~RPC_LAST_STREAM_FRAGMENT)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5df1d319f5d5..73140ee5c638 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -147,7 +147,7 @@ struct svc_rqst {
 	short			rq_arghi;	/* pages available in argument page list */
 	short			rq_resused;	/* pages used for result */
 
-	u32			rq_xid;		/* transmission id */
+	__be32			rq_xid;		/* transmission id */
 	u32			rq_prog;	/* program number */
 	u32			rq_vers;	/* program version */
 	u32			rq_proc;	/* procedure number */
@@ -156,7 +156,7 @@ struct svc_rqst {
 				rq_secure  : 1;	/* secure port */
 
 
-	__u32			rq_daddr;	/* dest addr of request - reply from here */
+	__be32			rq_daddr;	/* dest addr of request - reply from here */
 
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
@@ -186,7 +186,7 @@ struct svc_rqst {
  * Check buffer bounds after decoding arguments
  */
 static inline int
-xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
+xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p)
 {
 	char *cp = (char *)p;
 	struct kvec *vec = &rqstp->rq_arg.head[0];
@@ -195,7 +195,7 @@ xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
 }
 
 static inline int
-xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
+xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct kvec *vec = &rqstp->rq_res.head[0];
 	char *cp = (char*)p;
@@ -266,10 +266,10 @@ struct svc_deferred_req {
 	u32			prot;	/* protocol (UDP or TCP) */
 	struct sockaddr_in	addr;
 	struct svc_sock		*svsk;	/* where reply must go */
-	u32			daddr;	/* where reply must come from */
+	__be32			daddr;	/* where reply must come from */
 	struct cache_deferred_req handle;
 	int			argslen;
-	u32			args[0];
+	__be32			args[0];
 };
 
 /*
@@ -301,7 +301,7 @@ struct svc_version {
 	 * A return value of 0 means drop the request. 
 	 * vs_dispatch == NULL means use default dispatcher.
 	 */
-	int			(*vs_dispatch)(struct svc_rqst *, u32 *);
+	int			(*vs_dispatch)(struct svc_rqst *, __be32 *);
 };
 
 /*
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 2fe2087edd66..a6601650deeb 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -95,7 +95,7 @@ struct auth_ops {
 	char *	name;
 	struct module *owner;
 	int	flavour;
-	int	(*accept)(struct svc_rqst *rq, u32 *authp);
+	int	(*accept)(struct svc_rqst *rq, __be32 *authp);
 	int	(*release)(struct svc_rqst *rq);
 	void	(*domain_release)(struct auth_domain *);
 	int	(*set_client)(struct svc_rqst *rq);
@@ -112,7 +112,7 @@ struct auth_ops {
 #define	SVC_COMPLETE	9
 
 
-extern int	svc_authenticate(struct svc_rqst *rqstp, u32 *authp);
+extern int	svc_authenticate(struct svc_rqst *rqstp, __be32 *authp);
 extern int	svc_authorise(struct svc_rqst *rqstp);
 extern int	svc_set_client(struct svc_rqst *rqstp);
 extern int	svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index e6d3d349506c..953723b09bc6 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -32,7 +32,7 @@ struct xdr_netobj {
  * side) or svc_rqst pointer (server side).
  * Encode functions always assume there's enough room in the buffer.
  */
-typedef int	(*kxdrproc_t)(void *rqstp, u32 *data, void *obj);
+typedef int	(*kxdrproc_t)(void *rqstp, __be32 *data, void *obj);
 
 /*
  * Basic structure for transmission/reception of a client XDR message.
@@ -88,19 +88,19 @@ struct xdr_buf {
 /*
  * Miscellaneous XDR helper functions
  */
-u32 *	xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int len);
-u32 *	xdr_encode_opaque(u32 *p, const void *ptr, unsigned int len);
-u32 *	xdr_encode_string(u32 *p, const char *s);
-u32 *	xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen);
-u32 *	xdr_encode_netobj(u32 *p, const struct xdr_netobj *);
-u32 *	xdr_decode_netobj(u32 *p, struct xdr_netobj *);
+__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len);
+__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len);
+__be32 *xdr_encode_string(__be32 *p, const char *s);
+__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen);
+__be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *);
+__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *);
 
 void	xdr_encode_pages(struct xdr_buf *, struct page **, unsigned int,
 			 unsigned int);
 void	xdr_inline_pages(struct xdr_buf *, unsigned int,
 			 struct page **, unsigned int, unsigned int);
 
-static inline u32 *xdr_encode_array(u32 *p, const void *s, unsigned int len)
+static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
 {
 	return xdr_encode_opaque(p, s, len);
 }
@@ -108,16 +108,16 @@ static inline u32 *xdr_encode_array(u32 *p, const void *s, unsigned int len)
 /*
  * Decode 64bit quantities (NFSv3 support)
  */
-static inline u32 *
-xdr_encode_hyper(u32 *p, __u64 val)
+static inline __be32 *
+xdr_encode_hyper(__be32 *p, __u64 val)
 {
 	*p++ = htonl(val >> 32);
 	*p++ = htonl(val & 0xFFFFFFFF);
 	return p;
 }
 
-static inline u32 *
-xdr_decode_hyper(u32 *p, __u64 *valp)
+static inline __be32 *
+xdr_decode_hyper(__be32 *p, __u64 *valp)
 {
 	*valp  = ((__u64) ntohl(*p++)) << 32;
 	*valp |= ntohl(*p++);
@@ -128,7 +128,7 @@ xdr_decode_hyper(u32 *p, __u64 *valp)
  * Adjust kvec to reflect end of xdr'ed data (RPC client XDR)
  */
 static inline int
-xdr_adjust_iovec(struct kvec *iov, u32 *p)
+xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 {
 	return iov->iov_len = ((u8 *) p - (u8 *) iov->iov_base);
 }
@@ -180,19 +180,19 @@ extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
  * Provide some simple tools for XDR buffer overflow-checking etc.
  */
 struct xdr_stream {
-	uint32_t *p;		/* start of available buffer */
+	__be32 *p;		/* start of available buffer */
 	struct xdr_buf *buf;	/* XDR buffer to read/write */
 
-	uint32_t *end;		/* end of available buffer space */
+	__be32 *end;		/* end of available buffer space */
 	struct kvec *iov;	/* pointer to the current kvec */
 };
 
-extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p);
-extern uint32_t *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
-extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p);
-extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
 
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index bdeba8538c71..6cf626580752 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -79,7 +79,7 @@ struct rpc_rqst {
 	 * This is the private part
 	 */
 	struct rpc_task *	rq_task;	/* RPC task data */
-	__u32			rq_xid;		/* request XID */
+	__be32			rq_xid;		/* request XID */
 	int			rq_cong;	/* has incremented xprt->cong */
 	int			rq_received;	/* receive completed */
 	u32			rq_seqno;	/* gss seq no. used on req. */
@@ -171,9 +171,9 @@ struct rpc_xprt {
 	/*
 	 * State of TCP reply receive stuff
 	 */
-	u32			tcp_recm,	/* Fragment header */
-				tcp_xid,	/* Current XID */
-				tcp_reclen,	/* fragment length */
+	__be32			tcp_recm,	/* Fragment header */
+				tcp_xid;	/* Current XID */
+	u32			tcp_reclen,	/* fragment length */
 				tcp_offset;	/* fragment offset */
 	unsigned long		tcp_copied,	/* copied to request */
 				tcp_flags;
@@ -253,7 +253,7 @@ void			xprt_release(struct rpc_task *task);
 struct rpc_xprt *	xprt_get(struct rpc_xprt *xprt);
 void			xprt_put(struct rpc_xprt *xprt);
 
-static inline u32 *xprt_skip_transport_header(struct rpc_xprt *xprt, u32 *p)
+static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
 {
 	return p + xprt->tsh_size;
 }
@@ -268,7 +268,7 @@ void			xprt_wait_for_buffer_space(struct rpc_task *task);
 void			xprt_write_space(struct rpc_xprt *xprt);
 void			xprt_update_rtt(struct rpc_task *task);
 void			xprt_adjust_cwnd(struct rpc_task *task, int result);
-struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid);
+struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
 void			xprt_complete_rqst(struct rpc_task *task, int copied);
 void			xprt_release_rqst_cong(struct rpc_task *task);
 void			xprt_disconnect(struct rpc_xprt *xprt);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 55163af3dcaf..993ff1a5d945 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -331,8 +331,8 @@ rpcauth_unbindcred(struct rpc_task *task)
 	task->tk_msg.rpc_cred = NULL;
 }
 
-u32 *
-rpcauth_marshcred(struct rpc_task *task, u32 *p)
+__be32 *
+rpcauth_marshcred(struct rpc_task *task, __be32 *p)
 {
 	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
 
@@ -342,8 +342,8 @@ rpcauth_marshcred(struct rpc_task *task, u32 *p)
 	return cred->cr_ops->crmarshal(task, p);
 }
 
-u32 *
-rpcauth_checkverf(struct rpc_task *task, u32 *p)
+__be32 *
+rpcauth_checkverf(struct rpc_task *task, __be32 *p)
 {
 	struct rpc_cred	*cred = task->tk_msg.rpc_cred;
 
@@ -355,7 +355,7 @@ rpcauth_checkverf(struct rpc_task *task, u32 *p)
 
 int
 rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
-		u32 *data, void *obj)
+		__be32 *data, void *obj)
 {
 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
 
@@ -369,7 +369,7 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
 
 int
 rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
-		u32 *data, void *obj)
+		__be32 *data, void *obj)
 {
 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 6eed3e166ba3..a6ed2d22a6e6 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -826,14 +826,14 @@ out:
 * Marshal credentials.
 * Maybe we should keep a cached credential for performance reasons.
 */
-static u32 *
-gss_marshal(struct rpc_task *task, u32 *p)
+static __be32 *
+gss_marshal(struct rpc_task *task, __be32 *p)
 {
 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 						 gc_base);
 	struct gss_cl_ctx	*ctx = gss_cred_get_ctx(cred);
-	u32		*cred_len;
+	__be32		*cred_len;
 	struct rpc_rqst *req = task->tk_rqstp;
 	u32             maj_stat = 0;
 	struct xdr_netobj mic;
@@ -894,12 +894,12 @@ gss_refresh(struct rpc_task *task)
 	return 0;
 }
 
-static u32 *
-gss_validate(struct rpc_task *task, u32 *p)
+static __be32 *
+gss_validate(struct rpc_task *task, __be32 *p)
 {
 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-	u32		seq;
+	__be32		seq;
 	struct kvec	iov;
 	struct xdr_buf	verf_buf;
 	struct xdr_netobj mic;
@@ -940,13 +940,14 @@ out_bad:
 
 static inline int
 gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
+		kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
 {
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_buf	integ_buf;
-	u32             *integ_len = NULL;
+	__be32          *integ_len = NULL;
 	struct xdr_netobj mic;
-	u32		offset, *q;
+	u32		offset;
+	__be32		*q;
 	struct kvec	*iov;
 	u32             maj_stat = 0;
 	int		status = -EIO;
@@ -1032,13 +1033,13 @@ out:
 
 static inline int
 gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
+		kxdrproc_t encode, struct rpc_rqst *rqstp, __be32 *p, void *obj)
 {
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	u32		offset;
 	u32             maj_stat;
 	int		status;
-	u32		*opaque_len;
+	__be32		*opaque_len;
 	struct page	**inpages;
 	int		first;
 	int		pad;
@@ -1095,7 +1096,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 
 static int
 gss_wrap_req(struct rpc_task *task,
-	     kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
+	     kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
 {
 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
@@ -1132,7 +1133,7 @@ out:
 
 static inline int
 gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		struct rpc_rqst *rqstp, u32 **p)
+		struct rpc_rqst *rqstp, __be32 **p)
 {
 	struct xdr_buf	*rcv_buf = &rqstp->rq_rcv_buf;
 	struct xdr_buf integ_buf;
@@ -1169,7 +1170,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 
 static inline int
 gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		struct rpc_rqst *rqstp, u32 **p)
+		struct rpc_rqst *rqstp, __be32 **p)
 {
 	struct xdr_buf  *rcv_buf = &rqstp->rq_rcv_buf;
 	u32 offset;
@@ -1198,13 +1199,13 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 
 static int
 gss_unwrap_resp(struct rpc_task *task,
-		kxdrproc_t decode, void *rqstp, u32 *p, void *obj)
+		kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
 {
 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
 			gc_base);
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-	u32		*savedp = p;
+	__be32		*savedp = p;
 	struct kvec	*head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
 	int		savedlen = head->iov_len;
 	int             status = -EIO;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 2f312164d6d5..08601ee4cd73 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -115,7 +115,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 	krb5_hdr = ptr - 2;
 	msg_start = krb5_hdr + 24;
 
-	*(u16 *)(krb5_hdr + 2) = htons(ctx->signalg);
+	*(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg);
 	memset(krb5_hdr + 4, 0xff, 4);
 
 	if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum))
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index f179415d0c38..cc45c1605f80 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -177,9 +177,9 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	msg_start = krb5_hdr + 24;
 	/* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize);
 
-	*(u16 *)(krb5_hdr + 2) = htons(kctx->signalg);
+	*(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg);
 	memset(krb5_hdr + 4, 0xff, 4);
-	*(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
+	*(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
 
 	make_confounder(msg_start, blocksize);
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index cd3d77ca3a31..a2587e8eef7b 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -640,7 +640,7 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
  */
 static int
 gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
-		  u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp)
+		  __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp)
 {
 	struct gss_ctx		*ctx_id = rsci->mechctx;
 	struct xdr_buf		rpchdr;
@@ -687,7 +687,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
 static int
 gss_write_null_verf(struct svc_rqst *rqstp)
 {
-	u32     *p;
+	__be32     *p;
 
 	svc_putnl(rqstp->rq_res.head, RPC_AUTH_NULL);
 	p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
@@ -701,11 +701,11 @@ gss_write_null_verf(struct svc_rqst *rqstp)
 static int
 gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
 {
-	u32			xdr_seq;
+	__be32			xdr_seq;
 	u32			maj_stat;
 	struct xdr_buf		verf_data;
 	struct xdr_netobj	mic;
-	u32			*p;
+	__be32			*p;
 	struct kvec		iov;
 
 	svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS);
@@ -782,7 +782,7 @@ EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor);
 static inline int
 read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
 {
-	u32     raw;
+	__be32  raw;
 	int     status;
 
 	status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
@@ -905,7 +905,7 @@ struct gss_svc_data {
 	struct rpc_gss_wire_cred	clcred;
 	/* pointer to the beginning of the procedure-specific results,
 	 * which may be encrypted/checksummed in svcauth_gss_release: */
-	u32				*body_start;
+	__be32				*body_start;
 	struct rsc			*rsci;
 };
 
@@ -946,7 +946,7 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip)
  * response here and return SVC_COMPLETE.
  */
 static int
-svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
+svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
@@ -956,8 +956,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
 	struct rpc_gss_wire_cred *gc;
 	struct rsc	*rsci = NULL;
 	struct rsi	*rsip, rsikey;
-	u32		*rpcstart;
-	u32		*reject_stat = resv->iov_base + resv->iov_len;
+	__be32		*rpcstart;
+	__be32		*reject_stat = resv->iov_base + resv->iov_len;
 	int		ret;
 
 	dprintk("RPC:      svcauth_gss: argv->iov_len = %zd\n",argv->iov_len);
@@ -1156,7 +1156,7 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
 	struct xdr_buf integ_buf;
 	struct xdr_netobj mic;
 	struct kvec *resv;
-	u32 *p;
+	__be32 *p;
 	int integ_offset, integ_len;
 	int stat = -EINVAL;
 
@@ -1219,7 +1219,7 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
 	struct rpc_gss_wire_cred *gc = &gsd->clcred;
 	struct xdr_buf *resbuf = &rqstp->rq_res;
 	struct page **inpages = NULL;
-	u32 *p;
+	__be32 *p;
 	int offset, *len;
 	int pad;
 
@@ -1264,7 +1264,7 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
 		return -ENOMEM;
 	*len = htonl(resbuf->len - offset);
 	pad = 3 - ((resbuf->len - offset - 1)&3);
-	p = (u32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len);
+	p = (__be32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len);
 	memset(p, 0, pad);
 	resbuf->tail[0].iov_len += pad;
 	resbuf->len += pad;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 2eccffa96ba1..3be257dc32b2 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -60,8 +60,8 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
 /*
  * Marshal credential.
  */
-static u32 *
-nul_marshal(struct rpc_task *task, u32 *p)
+static __be32 *
+nul_marshal(struct rpc_task *task, __be32 *p)
 {
 	*p++ = htonl(RPC_AUTH_NULL);
 	*p++ = 0;
@@ -81,8 +81,8 @@ nul_refresh(struct rpc_task *task)
 	return 0;
 }
 
-static u32 *
-nul_validate(struct rpc_task *task, u32 *p)
+static __be32 *
+nul_validate(struct rpc_task *task, __be32 *p)
 {
 	rpc_authflavor_t	flavor;
 	u32			size;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 74c7406a1054..f7f990c9afe2 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -137,12 +137,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
  * Marshal credentials.
  * Maybe we should keep a cached credential for performance reasons.
  */
-static u32 *
-unx_marshal(struct rpc_task *task, u32 *p)
+static __be32 *
+unx_marshal(struct rpc_task *task, __be32 *p)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 	struct unx_cred	*cred = (struct unx_cred *) task->tk_msg.rpc_cred;
-	u32		*base, *hold;
+	__be32		*base, *hold;
 	int		i;
 
 	*p++ = htonl(RPC_AUTH_UNIX);
@@ -178,8 +178,8 @@ unx_refresh(struct rpc_task *task)
 	return 0;
 }
 
-static u32 *
-unx_validate(struct rpc_task *task, u32 *p)
+static __be32 *
+unx_validate(struct rpc_task *task, __be32 *p)
 {
 	rpc_authflavor_t	flavor;
 	u32			size;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 084a0ad5c64e..124ff0ceb55b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -60,8 +60,8 @@ static void	call_refreshresult(struct rpc_task *task);
 static void	call_timeout(struct rpc_task *task);
 static void	call_connect(struct rpc_task *task);
 static void	call_connect_status(struct rpc_task *task);
-static u32 *	call_header(struct rpc_task *task);
-static u32 *	call_verify(struct rpc_task *task);
+static __be32 *	call_header(struct rpc_task *task);
+static __be32 *	call_verify(struct rpc_task *task);
 
 
 static int
@@ -782,7 +782,7 @@ call_encode(struct rpc_task *task)
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	unsigned int	bufsiz;
 	kxdrproc_t	encode;
-	u32		*p;
+	__be32		*p;
 
 	dprintk("RPC: %4d call_encode (status %d)\n", 
 				task->tk_pid, task->tk_status);
@@ -1100,7 +1100,7 @@ call_decode(struct rpc_task *task)
 	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
 	kxdrproc_t	decode = task->tk_msg.rpc_proc->p_decode;
-	u32		*p;
+	__be32		*p;
 
 	dprintk("RPC: %4d call_decode (status %d)\n", 
 				task->tk_pid, task->tk_status);
@@ -1197,12 +1197,12 @@ call_refreshresult(struct rpc_task *task)
 /*
  * Call header serialization
  */
-static u32 *
+static __be32 *
 call_header(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	u32		*p = req->rq_svec[0].iov_base;
+	__be32		*p = req->rq_svec[0].iov_base;
 
 	/* FIXME: check buffer size? */
 
@@ -1221,12 +1221,13 @@ call_header(struct rpc_task *task)
 /*
  * Reply header verification
  */
-static u32 *
+static __be32 *
 call_verify(struct rpc_task *task)
 {
 	struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
 	int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
-	u32	*p = iov->iov_base, n;
+	__be32	*p = iov->iov_base;
+	u32 n;
 	int error = -EACCES;
 
 	if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
@@ -1303,7 +1304,7 @@ call_verify(struct rpc_task *task)
 		printk(KERN_WARNING "call_verify: auth check failed\n");
 		goto out_garbage;		/* bad verifier, retry */
 	}
-	len = p - (u32 *)iov->iov_base - 1;
+	len = p - (__be32 *)iov->iov_base - 1;
 	if (len < 0)
 		goto out_overflow;
 	switch ((n = ntohl(*p++))) {
@@ -1358,12 +1359,12 @@ out_overflow:
 	goto out_garbage;
 }
 
-static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj)
+static int rpcproc_encode_null(void *rqstp, __be32 *data, void *obj)
 {
 	return 0;
 }
 
-static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj)
+static int rpcproc_decode_null(void *rqstp, __be32 *data, void *obj)
 {
 	return 0;
 }
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index c04609d3476a..919d5ba7ca0a 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -300,7 +300,7 @@ static struct rpc_clnt *pmap_create(char *hostname, struct sockaddr_in *srvaddr,
 /*
  * XDR encode/decode functions for PMAP
  */
-static int xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args *map)
+static int xdr_encode_mapping(struct rpc_rqst *req, __be32 *p, struct portmap_args *map)
 {
 	dprintk("RPC: xdr_encode_mapping(%u, %u, %u, %u)\n",
 		map->pm_prog, map->pm_vers, map->pm_prot, map->pm_port);
@@ -313,13 +313,13 @@ static int xdr_encode_mapping(struct rpc_rqst *req, u32 *p, struct portmap_args
 	return 0;
 }
 
-static int xdr_decode_port(struct rpc_rqst *req, u32 *p, unsigned short *portp)
+static int xdr_decode_port(struct rpc_rqst *req, __be32 *p, unsigned short *portp)
 {
 	*portp = (unsigned short) ntohl(*p++);
 	return 0;
 }
 
-static int xdr_decode_bool(struct rpc_rqst *req, u32 *p, unsigned int *boolp)
+static int xdr_decode_bool(struct rpc_rqst *req, __be32 *p, unsigned int *boolp)
 {
 	*boolp = (unsigned int) ntohl(*p++);
 	return 0;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6589e1ad47fa..44b8d9d4c18a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -256,11 +256,11 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
 	struct kvec *		argv = &rqstp->rq_arg.head[0];
 	struct kvec *		resv = &rqstp->rq_res.head[0];
 	kxdrproc_t		xdr;
-	u32			*statp;
-	u32			dir, prog, vers, proc,
-				auth_stat, rpc_stat;
+	__be32			*statp;
+	u32			dir, prog, vers, proc;
+	__be32			auth_stat, rpc_stat;
 	int			auth_res;
-	u32			*accept_statp;
+	__be32			*accept_statp;
 
 	rpc_stat = rpc_success;
 
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 3f4d1f622de8..8f2320aded5c 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -35,7 +35,7 @@ static struct auth_ops	*authtab[RPC_AUTH_MAXFLAVOR] = {
 };
 
 int
-svc_authenticate(struct svc_rqst *rqstp, u32 *authp)
+svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
 {
 	rpc_authflavor_t	flavor;
 	struct auth_ops		*aops;
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 27f443b44af8..f98229fb5e10 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -145,7 +145,7 @@ static void ip_map_request(struct cache_detail *cd,
 {
 	char text_addr[20];
 	struct ip_map *im = container_of(h, struct ip_map, h);
-	__u32 addr = im->m_addr.s_addr;
+	__be32 addr = im->m_addr.s_addr;
 	
 	snprintf(text_addr, 20, "%u.%u.%u.%u",
 		 ntohl(addr) >> 24 & 0xff,
@@ -410,7 +410,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 }
 
 static int
-svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp)
+svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
@@ -472,7 +472,7 @@ struct auth_ops svcauth_null = {
 
 
 static int
-svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp)
+svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 {
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
@@ -491,7 +491,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp)
 	slen = XDR_QUADLEN(svc_getnl(argv));	/* machname length */
 	if (slen > 64 || (len -= (slen + 3)*4) < 0)
 		goto badcred;
-	argv->iov_base = (void*)((u32*)argv->iov_base + slen);	/* skip machname */
+	argv->iov_base = (void*)((__be32*)argv->iov_base + slen);	/* skip machname */
 	argv->iov_len -= slen*4;
 
 	cred->cr_uid = svc_getnl(argv);		/* uid */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 953aff89bcac..f09f7487051f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1030,7 +1030,7 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 {
 	struct xdr_buf	*xbufp = &rqstp->rq_res;
 	int sent;
-	u32 reclen;
+	__be32 reclen;
 
 	/* Set up the first element of the reply kvec.
 	 * Any other kvecs that may be in use have been taken
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 6ac45103a272..9022eb8b37ed 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -18,8 +18,8 @@
 /*
  * XDR functions for basic NFS types
  */
-u32 *
-xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj)
+__be32 *
+xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj)
 {
 	unsigned int	quadlen = XDR_QUADLEN(obj->len);
 
@@ -29,8 +29,8 @@ xdr_encode_netobj(u32 *p, const struct xdr_netobj *obj)
 	return p + XDR_QUADLEN(obj->len);
 }
 
-u32 *
-xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
+__be32 *
+xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
 {
 	unsigned int	len;
 
@@ -55,7 +55,7 @@ xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
  * Returns the updated current XDR buffer position
  *
  */
-u32 *xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int nbytes)
+__be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int nbytes)
 {
 	if (likely(nbytes != 0)) {
 		unsigned int quadlen = XDR_QUADLEN(nbytes);
@@ -79,21 +79,21 @@ EXPORT_SYMBOL(xdr_encode_opaque_fixed);
  *
  * Returns the updated current XDR buffer position
  */
-u32 *xdr_encode_opaque(u32 *p, const void *ptr, unsigned int nbytes)
+__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
 {
 	*p++ = htonl(nbytes);
 	return xdr_encode_opaque_fixed(p, ptr, nbytes);
 }
 EXPORT_SYMBOL(xdr_encode_opaque);
 
-u32 *
-xdr_encode_string(u32 *p, const char *string)
+__be32 *
+xdr_encode_string(__be32 *p, const char *string)
 {
 	return xdr_encode_array(p, string, strlen(string));
 }
 
-u32 *
-xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen)
+__be32 *
+xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen)
 {
 	unsigned int	len;
 
@@ -432,7 +432,7 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len)
  *	 of the buffer length, and takes care of adjusting the kvec
  *	 length for us.
  */
-void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
+void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 {
 	struct kvec *iov = buf->head;
 	int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
@@ -440,8 +440,8 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
 	BUG_ON(scratch_len < 0);
 	xdr->buf = buf;
 	xdr->iov = iov;
-	xdr->p = (uint32_t *)((char *)iov->iov_base + iov->iov_len);
-	xdr->end = (uint32_t *)((char *)iov->iov_base + scratch_len);
+	xdr->p = (__be32 *)((char *)iov->iov_base + iov->iov_len);
+	xdr->end = (__be32 *)((char *)iov->iov_base + scratch_len);
 	BUG_ON(iov->iov_len > scratch_len);
 
 	if (p != xdr->p && p != NULL) {
@@ -465,10 +465,10 @@ EXPORT_SYMBOL(xdr_init_encode);
  * bytes of data. If so, update the total xdr_buf length, and
  * adjust the length of the current kvec.
  */
-uint32_t * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
+__be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
 {
-	uint32_t *p = xdr->p;
-	uint32_t *q;
+	__be32 *p = xdr->p;
+	__be32 *q;
 
 	/* align nbytes on the next 32-bit boundary */
 	nbytes += 3;
@@ -524,7 +524,7 @@ EXPORT_SYMBOL(xdr_write_pages);
  * @buf: pointer to XDR buffer from which to decode data
  * @p: current pointer inside XDR buffer
  */
-void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
+void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 {
 	struct kvec *iov = buf->head;
 	unsigned int len = iov->iov_len;
@@ -534,7 +534,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p)
 	xdr->buf = buf;
 	xdr->iov = iov;
 	xdr->p = p;
-	xdr->end = (uint32_t *)((char *)iov->iov_base + len);
+	xdr->end = (__be32 *)((char *)iov->iov_base + len);
 }
 EXPORT_SYMBOL(xdr_init_decode);
 
@@ -548,10 +548,10 @@ EXPORT_SYMBOL(xdr_init_decode);
  * If so return the current pointer, then update the current
  * pointer position.
  */
-uint32_t * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
+__be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
-	uint32_t *p = xdr->p;
-	uint32_t *q = p + XDR_QUADLEN(nbytes);
+	__be32 *p = xdr->p;
+	__be32 *q = p + XDR_QUADLEN(nbytes);
 
 	if (unlikely(q > xdr->end || q < p))
 		return NULL;
@@ -599,8 +599,8 @@ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
 	 * Position current pointer at beginning of tail, and
 	 * set remaining message length.
 	 */
-	xdr->p = (uint32_t *)((char *)iov->iov_base + padding);
-	xdr->end = (uint32_t *)((char *)iov->iov_base + end);
+	xdr->p = (__be32 *)((char *)iov->iov_base + padding);
+	xdr->end = (__be32 *)((char *)iov->iov_base + end);
 }
 EXPORT_SYMBOL(xdr_read_pages);
 
@@ -624,8 +624,8 @@ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
 	 */
 	if (len > PAGE_CACHE_SIZE - xdr->buf->page_base)
 		len = PAGE_CACHE_SIZE - xdr->buf->page_base;
-	xdr->p = (uint32_t *)(kaddr + xdr->buf->page_base);
-	xdr->end = (uint32_t *)((char *)xdr->p + len);
+	xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
+	xdr->end = (__be32 *)((char *)xdr->p + len);
 }
 EXPORT_SYMBOL(xdr_enter_page);
 
@@ -743,7 +743,7 @@ out:
 int
 xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
 {
-	u32	raw;
+	__be32	raw;
 	int	status;
 
 	status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
@@ -756,7 +756,7 @@ xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
 int
 xdr_encode_word(struct xdr_buf *buf, int base, u32 obj)
 {
-	u32	raw = htonl(obj);
+	__be32	raw = htonl(obj);
 
 	return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
 }
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1f786f68729d..80857470dc11 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -594,7 +594,7 @@ static void xprt_connect_status(struct rpc_task *task)
  * @xid: RPC XID of incoming reply
  *
  */
-struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
+struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 {
 	struct list_head *pos;
 
@@ -801,7 +801,7 @@ void xprt_reserve(struct rpc_task *task)
 	spin_unlock(&xprt->reserve_lock);
 }
 
-static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt)
+static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
 {
 	return xprt->xid++;
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9b62923a9c06..28100e019225 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -548,7 +548,8 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 	struct rpc_rqst *rovr;
 	struct sk_buff *skb;
 	int err, repsize, copied;
-	u32 _xid, *xp;
+	u32 _xid;
+	__be32 *xp;
 
 	read_lock(&sk->sk_callback_lock);
 	dprintk("RPC:      xs_udp_data_ready...\n");
-- 
cgit v1.2.3


From 126a336822a6594662f5898f1ddf33e6d048fcc7 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Wed, 27 Sep 2006 16:03:07 -0700
Subject: [TG3]: Add 5722 and 5756 support.

Add IDs to support 5722 and 5756.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c       | 3 +++
 drivers/net/tg3.h       | 4 +++-
 include/linux/pci_ids.h | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 14e964524969..d443b7372325 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -173,6 +173,7 @@ static struct pci_device_id tg3_pci_tbl[] = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5722)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M)},
@@ -187,6 +188,7 @@ static struct pci_device_id tg3_pci_tbl[] = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5756)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M)},
@@ -11273,6 +11275,7 @@ static char * __devinit tg3_phy_string(struct tg3 *tp)
 	case PHY_ID_BCM5780:	return "5780";
 	case PHY_ID_BCM5755:	return "5755";
 	case PHY_ID_BCM5787:	return "5787";
+	case PHY_ID_BCM5756:	return "5722/5756";
 	case PHY_ID_BCM8002:	return "8002/serdes";
 	case 0:			return "serdes";
 	default:		return "unknown";
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index f7462c2ccc0a..feed13dc8719 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2282,6 +2282,7 @@ struct tg3 {
 #define PHY_ID_BCM5780			0x60008350
 #define PHY_ID_BCM5755			0xbc050cc0
 #define PHY_ID_BCM5787			0xbc050ce0
+#define PHY_ID_BCM5756			0xbc050ed0
 #define PHY_ID_BCM8002			0x60010140
 #define PHY_ID_INVALID			0xffffffff
 #define PHY_ID_REV_MASK			0x0000000f
@@ -2308,7 +2309,8 @@ struct tg3 {
 	 (X) == PHY_ID_BCM5705 || (X) == PHY_ID_BCM5750 || \
 	 (X) == PHY_ID_BCM5752 || (X) == PHY_ID_BCM5714 || \
 	 (X) == PHY_ID_BCM5780 || (X) == PHY_ID_BCM5787 || \
-	 (X) == PHY_ID_BCM5755 || (X) == PHY_ID_BCM8002)
+	 (X) == PHY_ID_BCM5755 || (X) == PHY_ID_BCM5756 || \
+	 (X) == PHY_ID_BCM8002)
 
 	struct tg3_hw_stats		*hw_stats;
 	dma_addr_t			stats_mapping;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 61db1907f06f..ea3140d226e6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1904,6 +1904,7 @@
 #define PCI_DEVICE_ID_TIGON3_5705_2	0x1654
 #define PCI_DEVICE_ID_TIGON3_5720	0x1658
 #define PCI_DEVICE_ID_TIGON3_5721	0x1659
+#define PCI_DEVICE_ID_TIGON3_5722	0x165a
 #define PCI_DEVICE_ID_TIGON3_5705M	0x165d
 #define PCI_DEVICE_ID_TIGON3_5705M_2	0x165e
 #define PCI_DEVICE_ID_TIGON3_5714	0x1668
@@ -1913,6 +1914,7 @@
 #define PCI_DEVICE_ID_TIGON3_5705F	0x166e
 #define PCI_DEVICE_ID_TIGON3_5754M	0x1672
 #define PCI_DEVICE_ID_TIGON3_5755M	0x1673
+#define PCI_DEVICE_ID_TIGON3_5756	0x1674
 #define PCI_DEVICE_ID_TIGON3_5750	0x1676
 #define PCI_DEVICE_ID_TIGON3_5751	0x1677
 #define PCI_DEVICE_ID_TIGON3_5715	0x1678
-- 
cgit v1.2.3


From b5d3772ccbe0bc5ac8ffbb5356b74ca698aee28c Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Wed, 27 Sep 2006 16:06:21 -0700
Subject: [TG3]: Add basic 5906 support.

Add support for the new 5709 device.  This is a new 10/100 Mbps chip.
The mailbox access and firmware interface are quite different from
all other tg3 chips.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c       | 126 +++++++++++++++++++++++++++++++++++++++++++-----
 drivers/net/tg3.h       |  28 +++++++++--
 include/linux/pci_ids.h |   2 +
 3 files changed, 141 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index eafca2a0dd00..2b062d776511 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -199,6 +199,8 @@ static struct pci_device_id tg3_pci_tbl[] = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5906)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5906M)},
 	{PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)},
 	{PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX)},
 	{PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000)},
@@ -426,6 +428,16 @@ static void tg3_write32_tx_mbox(struct tg3 *tp, u32 off, u32 val)
 		readl(mbox);
 }
 
+static u32 tg3_read32_mbox_5906(struct tg3 *tp, u32 off)
+{
+	return (readl(tp->regs + off + GRCMBOX_BASE));
+}
+
+static void tg3_write32_mbox_5906(struct tg3 *tp, u32 off, u32 val)
+{
+	writel(val, tp->regs + off + GRCMBOX_BASE);
+}
+
 #define tw32_mailbox(reg, val)	tp->write32_mbox(tp, reg, val)
 #define tw32_mailbox_f(reg, val)	tw32_mailbox_flush(tp, (reg), (val))
 #define tw32_rx_mbox(reg, val)	tp->write32_rx_mbox(tp, reg, val)
@@ -441,6 +453,10 @@ static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val)
 {
 	unsigned long flags;
 
+	if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) &&
+	    (off >= NIC_SRAM_STATS_BLK) && (off < NIC_SRAM_TX_BUFFER_DESC))
+		return;
+
 	spin_lock_irqsave(&tp->indirect_lock, flags);
 	if (tp->tg3_flags & TG3_FLAG_SRAM_USE_CONFIG) {
 		pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off);
@@ -462,6 +478,12 @@ static void tg3_read_mem(struct tg3 *tp, u32 off, u32 *val)
 {
 	unsigned long flags;
 
+	if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) &&
+	    (off >= NIC_SRAM_STATS_BLK) && (off < NIC_SRAM_TX_BUFFER_DESC)) {
+		*val = 0;
+		return;
+	}
+
 	spin_lock_irqsave(&tp->indirect_lock, flags);
 	if (tp->tg3_flags & TG3_FLAG_SRAM_USE_CONFIG) {
 		pci_write_config_dword(tp->pdev, TG3PCI_MEM_WIN_BASE_ADDR, off);
@@ -491,6 +513,9 @@ static inline void tg3_cond_int(struct tg3 *tp)
 	if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) &&
 	    (tp->hw_status->status & SD_STATUS_UPDATED))
 		tw32(GRC_LOCAL_CTRL, tp->grc_local_ctrl | GRC_LCLCTRL_SETINT);
+	else
+		tw32(HOSTCC_MODE, tp->coalesce_mode |
+		     (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
 }
 
 static void tg3_enable_ints(struct tg3 *tp)
@@ -656,6 +681,10 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val)
 	unsigned int loops;
 	int ret;
 
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906 &&
+	    (reg == MII_TG3_CTRL || reg == MII_TG3_AUX_CTRL))
+		return 0;
+
 	if ((tp->mi_mode & MAC_MI_MODE_AUTO_POLL) != 0) {
 		tw32_f(MAC_MI_MODE,
 		     (tp->mi_mode & ~MAC_MI_MODE_AUTO_POLL));
@@ -1207,7 +1236,12 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state)
 		tg3_setup_phy(tp, 0);
 	}
 
-	if (!(tp->tg3_flags & TG3_FLAG_ENABLE_ASF)) {
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+		u32 val;
+
+		val = tr32(GRC_VCPU_EXT_CTRL);
+		tw32(GRC_VCPU_EXT_CTRL, val | GRC_VCPU_EXT_CTRL_DISABLE_WOL);
+	} else if (!(tp->tg3_flags & TG3_FLAG_ENABLE_ASF)) {
 		int i;
 		u32 val;
 
@@ -4667,6 +4701,15 @@ static int tg3_poll_fw(struct tg3 *tp)
 	int i;
 	u32 val;
 
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+		for (i = 0; i < 400; i++) {
+			if (tr32(VCPU_STATUS) & VCPU_STATUS_INIT_DONE)
+				return 0;
+			udelay(10);
+		}
+		return -ENODEV;
+	}
+
 	/* Wait for firmware initialization to complete. */
 	for (i = 0; i < 100000; i++) {
 		tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val);
@@ -4735,6 +4778,12 @@ static int tg3_chip_reset(struct tg3 *tp)
 		}
 	}
 
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+		tw32(VCPU_STATUS, tr32(VCPU_STATUS) | VCPU_STATUS_DRV_RESET);
+		tw32(GRC_VCPU_EXT_CTRL,
+		     tr32(GRC_VCPU_EXT_CTRL) & ~GRC_VCPU_EXT_CTRL_HALT_CPU);
+	}
+
 	if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
 		val |= GRC_MISC_CFG_KEEP_GPHY_POWER;
 	tw32(GRC_MISC_CFG, val);
@@ -5066,6 +5115,12 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset)
 	BUG_ON(offset == TX_CPU_BASE &&
 	    (tp->tg3_flags2 & TG3_FLG2_5705_PLUS));
 
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+		u32 val = tr32(GRC_VCPU_EXT_CTRL);
+
+		tw32(GRC_VCPU_EXT_CTRL, val | GRC_VCPU_EXT_CTRL_HALT_CPU);
+		return 0;
+	}
 	if (offset == RX_CPU_BASE) {
 		for (i = 0; i < 10000; i++) {
 			tw32(offset + CPU_STATE, 0xffffffff);
@@ -6070,6 +6125,13 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 		val = 1;
 	else if (val > tp->rx_std_max_post)
 		val = tp->rx_std_max_post;
+	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+		if (tp->pci_chip_rev_id == CHIPREV_ID_5906_A1)
+			tw32(ISO_PKT_TX, (tr32(ISO_PKT_TX) & ~0x3) | 0x2);
+
+		if (val > (TG3_RX_INTERNAL_RING_SZ_5906 / 2))
+			val = TG3_RX_INTERNAL_RING_SZ_5906 / 2;
+	}
 
 	tw32(RCVBDI_STD_THRESH, val);
 
@@ -6984,9 +7046,10 @@ static int tg3_open(struct net_device *dev)
 
 		if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
 			if (tp->tg3_flags2 & TG3_FLG2_1SHOT_MSI) {
-				u32 val = tr32(0x7c04);
+				u32 val = tr32(PCIE_TRANSACTION_CFG);
 
-				tw32(0x7c04, val | (1 << 29));
+				tw32(PCIE_TRANSACTION_CFG,
+				     val | PCIE_TRANS_CFG_1SHOT_MSI);
 			}
 		}
 	}
@@ -7941,7 +8004,8 @@ static int tg3_set_tso(struct net_device *dev, u32 value)
 			return -EINVAL;
 		return 0;
 	}
-	if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) {
+	if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) &&
+	    (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906)) {
 		if (value)
 			dev->features |= NETIF_F_TSO6;
 		else
@@ -9257,6 +9321,13 @@ static void __devinit tg3_get_5787_nvram_info(struct tg3 *tp)
 	}
 }
 
+static void __devinit tg3_get_5906_nvram_info(struct tg3 *tp)
+{
+	tp->nvram_jedecnum = JEDEC_ATMEL;
+	tp->tg3_flags |= TG3_FLAG_NVRAM_BUFFERED;
+	tp->nvram_pagesize = ATMEL_AT24C512_CHIP_SIZE;
+}
+
 /* Chips other than 5700/5701 use the NVRAM for fetching info. */
 static void __devinit tg3_nvram_init(struct tg3 *tp)
 {
@@ -9293,6 +9364,8 @@ static void __devinit tg3_nvram_init(struct tg3 *tp)
 			tg3_get_5755_nvram_info(tp);
 		else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787)
 			tg3_get_5787_nvram_info(tp);
+		else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
+			tg3_get_5906_nvram_info(tp);
 		else
 			tg3_get_nvram_info(tp);
 
@@ -9766,6 +9839,12 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp)
 	/* Assume an onboard device by default.  */
 	tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT;
 
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+		if (!(tr32(PCIE_TRANSACTION_CFG) & PCIE_TRANS_CFG_LOM))
+			tp->tg3_flags &= ~TG3_FLAG_EEPROM_WRITE_PROT;
+		return;
+	}
+
 	tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val);
 	if (val == NIC_SRAM_DATA_SIG_MAGIC) {
 		u32 nic_cfg, led_cfg;
@@ -10097,7 +10176,10 @@ static void __devinit tg3_read_partno(struct tg3 *tp)
 	}
 
 out_not_found:
-	strcpy(tp->board_part_number, "none");
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
+		strcpy(tp->board_part_number, "BCM95906");
+	else
+		strcpy(tp->board_part_number, "none");
 }
 
 static void __devinit tg3_read_fw_ver(struct tg3 *tp)
@@ -10299,6 +10381,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5752 ||
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906 ||
 	    (tp->tg3_flags2 & TG3_FLG2_5780_CLASS))
 		tp->tg3_flags2 |= TG3_FLG2_5750_PLUS;
 
@@ -10308,7 +10391,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 
 	if (tp->tg3_flags2 & TG3_FLG2_5750_PLUS) {
 		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
-		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) {
+		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 ||
+		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
 			tp->tg3_flags2 |= TG3_FLG2_HW_TSO_2;
 			tp->tg3_flags2 |= TG3_FLG2_1SHOT_MSI;
 		} else {
@@ -10325,7 +10409,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750 &&
 	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5752 &&
 	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5755 &&
-	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5787)
+	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5787 &&
+	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906)
 		tp->tg3_flags2 |= TG3_FLG2_JUMBO_CAPABLE;
 
 	if (pci_find_capability(tp->pdev, PCI_CAP_ID_EXP) != 0)
@@ -10455,6 +10540,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 		pci_cmd &= ~PCI_COMMAND_MEMORY;
 		pci_write_config_word(tp->pdev, PCI_COMMAND, pci_cmd);
 	}
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+		tp->read32_mbox = tg3_read32_mbox_5906;
+		tp->write32_mbox = tg3_write32_mbox_5906;
+		tp->write32_tx_mbox = tg3_write32_mbox_5906;
+		tp->write32_rx_mbox = tg3_write32_mbox_5906;
+	}
 
 	if (tp->write32 == tg3_write_indirect_reg32 ||
 	    ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) &&
@@ -10526,6 +10617,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	    ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) &&
 	     (tp->pci_chip_rev_id != CHIPREV_ID_5705_A0) &&
 	     (tp->pci_chip_rev_id != CHIPREV_ID_5705_A1)) ||
+	    (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) ||
 	    (tp->tg3_flags2 & TG3_FLG2_ANY_SERDES))
 		tp->tg3_flags2 |= TG3_FLG2_NO_ETH_WIRE_SPEED;
 
@@ -10539,7 +10631,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
 		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787)
 			tp->tg3_flags2 |= TG3_FLG2_PHY_JITTER_BUG;
-		else
+		else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906)
 			tp->tg3_flags2 |= TG3_FLG2_PHY_BER_BUG;
 	}
 
@@ -10629,7 +10721,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	      tp->pdev->device == PCI_DEVICE_ID_TIGON3_5705F)) ||
 	    (tp->pdev->vendor == PCI_VENDOR_ID_BROADCOM &&
 	     (tp->pdev->device == PCI_DEVICE_ID_TIGON3_5751F ||
-	      tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F)))
+	      tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F)) ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
 		tp->tg3_flags |= TG3_FLAG_10_100_ONLY;
 
 	err = tg3_phy_probe(tp);
@@ -10680,7 +10773,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	 * straddle the 4GB address boundary in some cases.
 	 */
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
-	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787)
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
 		tp->dev->hard_start_xmit = tg3_start_xmit;
 	else
 		tp->dev->hard_start_xmit = tg3_start_xmit_dma_bug;
@@ -10761,6 +10855,8 @@ static int __devinit tg3_get_device_address(struct tg3 *tp)
 		else
 			tg3_nvram_unlock(tp);
 	}
+	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906)
+		mac_offset = 0x10;
 
 	/* First try to get it from MAC address mailbox. */
 	tg3_read_mem(tp, NIC_SRAM_MAC_ADDR_HIGH_MBOX, &hi);
@@ -11244,6 +11340,12 @@ static void __devinit tg3_init_bufmgr_config(struct tg3 *tp)
 			DEFAULT_MB_MACRX_LOW_WATER_5705;
 		tp->bufmgr_config.mbuf_high_water =
 			DEFAULT_MB_HIGH_WATER_5705;
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+			tp->bufmgr_config.mbuf_mac_rx_low_water =
+				DEFAULT_MB_MACRX_LOW_WATER_5906;
+			tp->bufmgr_config.mbuf_high_water =
+				DEFAULT_MB_HIGH_WATER_5906;
+		}
 
 		tp->bufmgr_config.mbuf_read_dma_low_water_jumbo =
 			DEFAULT_MB_RDMA_LOW_WATER_JUMBO_5780;
@@ -11288,6 +11390,7 @@ static char * __devinit tg3_phy_string(struct tg3 *tp)
 	case PHY_ID_BCM5755:	return "5755";
 	case PHY_ID_BCM5787:	return "5787";
 	case PHY_ID_BCM5756:	return "5722/5756";
+	case PHY_ID_BCM5906:	return "5906";
 	case PHY_ID_BCM8002:	return "8002/serdes";
 	case 0:			return "serdes";
 	default:		return "unknown";
@@ -11590,7 +11693,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	 */
 	if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
 		dev->features |= NETIF_F_TSO;
-		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_2)
+		if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) &&
+		    (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906))
 			dev->features |= NETIF_F_TSO6;
 	}
 
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index feed13dc8719..2f5e00c96016 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -24,6 +24,8 @@
 
 #define RX_COPY_THRESHOLD  		256
 
+#define TG3_RX_INTERNAL_RING_SZ_5906	32
+
 #define RX_STD_MAX_SIZE			1536
 #define RX_STD_MAX_SIZE_5705		512
 #define RX_JUMBO_MAX_SIZE		0xdeadbeef /* XXX */
@@ -129,6 +131,7 @@
 #define  CHIPREV_ID_5752_A0_HW		 0x5000
 #define  CHIPREV_ID_5752_A0		 0x6000
 #define  CHIPREV_ID_5752_A1		 0x6001
+#define  CHIPREV_ID_5906_A1		 0xc001
 #define  GET_ASIC_REV(CHIP_REV_ID)	((CHIP_REV_ID) >> 12)
 #define   ASIC_REV_5700			 0x07
 #define   ASIC_REV_5701			 0x00
@@ -141,6 +144,7 @@
 #define   ASIC_REV_5714			 0x09
 #define   ASIC_REV_5755			 0x0a
 #define   ASIC_REV_5787			 0x0b
+#define   ASIC_REV_5906			 0x0c
 #define  GET_CHIP_REV(CHIP_REV_ID)	((CHIP_REV_ID) >> 8)
 #define   CHIPREV_5700_AX		 0x70
 #define   CHIPREV_5700_BX		 0x71
@@ -646,7 +650,8 @@
 #define  SNDDATAI_SCTRL_FORCE_ZERO	 0x00000010
 #define SNDDATAI_STATSENAB		0x00000c0c
 #define SNDDATAI_STATSINCMASK		0x00000c10
-/* 0xc14 --> 0xc80 unused */
+#define ISO_PKT_TX			0x00000c20
+/* 0xc24 --> 0xc80 unused */
 #define SNDDATAI_COS_CNT_0		0x00000c80
 #define SNDDATAI_COS_CNT_1		0x00000c84
 #define SNDDATAI_COS_CNT_2		0x00000c88
@@ -997,11 +1002,13 @@
 #define BUFMGR_MB_MACRX_LOW_WATER	0x00004414
 #define  DEFAULT_MB_MACRX_LOW_WATER	  0x00000020
 #define  DEFAULT_MB_MACRX_LOW_WATER_5705  0x00000010
+#define  DEFAULT_MB_MACRX_LOW_WATER_5906  0x00000004
 #define  DEFAULT_MB_MACRX_LOW_WATER_JUMBO 0x00000098
 #define  DEFAULT_MB_MACRX_LOW_WATER_JUMBO_5780 0x0000004b
 #define BUFMGR_MB_HIGH_WATER		0x00004418
 #define  DEFAULT_MB_HIGH_WATER		 0x00000060
 #define  DEFAULT_MB_HIGH_WATER_5705	 0x00000060
+#define  DEFAULT_MB_HIGH_WATER_5906	 0x00000010
 #define  DEFAULT_MB_HIGH_WATER_JUMBO	 0x0000017c
 #define  DEFAULT_MB_HIGH_WATER_JUMBO_5780 0x00000096
 #define BUFMGR_RX_MB_ALLOC_REQ		0x0000441c
@@ -1138,7 +1145,12 @@
 #define TX_CPU_STATE			0x00005404
 #define TX_CPU_PGMCTR			0x0000541c
 
+#define VCPU_STATUS			0x00005100
+#define  VCPU_STATUS_INIT_DONE		 0x04000000
+#define  VCPU_STATUS_DRV_RESET		 0x08000000
+
 /* Mailboxes */
+#define GRCMBOX_BASE			0x00005600
 #define GRCMBOX_INTERRUPT_0		0x00005800 /* 64-bit */
 #define GRCMBOX_INTERRUPT_1		0x00005808 /* 64-bit */
 #define GRCMBOX_INTERRUPT_2		0x00005810 /* 64-bit */
@@ -1398,7 +1410,10 @@
 #define GRC_EEPROM_CTRL			0x00006840
 #define GRC_MDI_CTRL			0x00006844
 #define GRC_SEEPROM_DELAY		0x00006848
-/* 0x684c --> 0x6c00 unused */
+/* 0x684c --> 0x6890 unused */
+#define GRC_VCPU_EXT_CTRL		0x00006890
+#define GRC_VCPU_EXT_CTRL_HALT_CPU	 0x00400000
+#define GRC_VCPU_EXT_CTRL_DISABLE_WOL	 0x20000000
 #define GRC_FASTBOOT_PC			0x00006894	/* 5752, 5755, 5787 */
 
 /* 0x6c00 --> 0x7000 unused */
@@ -1485,7 +1500,11 @@
 #define NVRAM_WRITE1			0x00007028
 /* 0x702c --> 0x7400 unused */
 
-/* 0x7400 --> 0x8000 unused */
+/* 0x7400 --> 0x7c00 unused */
+#define PCIE_TRANSACTION_CFG		0x00007c04
+#define PCIE_TRANS_CFG_1SHOT_MSI	 0x20000000
+#define PCIE_TRANS_CFG_LOM		 0x00000020
+
 
 #define TG3_EEPROM_MAGIC		0x669955aa
 
@@ -2283,6 +2302,7 @@ struct tg3 {
 #define PHY_ID_BCM5755			0xbc050cc0
 #define PHY_ID_BCM5787			0xbc050ce0
 #define PHY_ID_BCM5756			0xbc050ed0
+#define PHY_ID_BCM5906			0xdc00ac40
 #define PHY_ID_BCM8002			0x60010140
 #define PHY_ID_INVALID			0xffffffff
 #define PHY_ID_REV_MASK			0x0000000f
@@ -2310,7 +2330,7 @@ struct tg3 {
 	 (X) == PHY_ID_BCM5752 || (X) == PHY_ID_BCM5714 || \
 	 (X) == PHY_ID_BCM5780 || (X) == PHY_ID_BCM5787 || \
 	 (X) == PHY_ID_BCM5755 || (X) == PHY_ID_BCM5756 || \
-	 (X) == PHY_ID_BCM8002)
+	 (X) == PHY_ID_BCM5906 || (X) == PHY_ID_BCM8002)
 
 	struct tg3_hw_stats		*hw_stats;
 	dma_addr_t			stats_mapping;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ea3140d226e6..b7e85ff045ea 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1944,6 +1944,8 @@
 #define PCI_DEVICE_ID_TIGON3_5901	0x170d
 #define PCI_DEVICE_ID_BCM4401B1		0x170c
 #define PCI_DEVICE_ID_TIGON3_5901_2	0x170e
+#define PCI_DEVICE_ID_TIGON3_5906	0x1712
+#define PCI_DEVICE_ID_TIGON3_5906M	0x1713
 #define PCI_DEVICE_ID_BCM4401		0x4401
 #define PCI_DEVICE_ID_BCM4401B0		0x4402
 
-- 
cgit v1.2.3


From 00a5020cd51febbb3166ff7a09a2901c47ba251a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:29:47 -0700
Subject: [IPV4]: annotate ipv4 address fields in struct ip_msfilter and struct
 ip_mreq_source

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in.h b/include/linux/in.h
index bcaca8399aed..d79fc75fa7c2 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -123,17 +123,17 @@ struct ip_mreqn
 };
 
 struct ip_mreq_source {
-	__u32		imr_multiaddr;
-	__u32		imr_interface;
-	__u32		imr_sourceaddr;
+	__be32		imr_multiaddr;
+	__be32		imr_interface;
+	__be32		imr_sourceaddr;
 };
 
 struct ip_msfilter {
-	__u32		imsf_multiaddr;
-	__u32		imsf_interface;
+	__be32		imsf_multiaddr;
+	__be32		imsf_interface;
 	__u32		imsf_fmode;
 	__u32		imsf_numsrc;
-	__u32		imsf_slist[1];
+	__be32		imsf_slist[1];
 };
 
 #define IP_MSFILTER_SIZE(numsrc) \
-- 
cgit v1.2.3


From 8f935bbd7c6c66796c2403aefdab74bb48045bf6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:30:07 -0700
Subject: [IPV4]: ip_mc_{inc,dec}_group() annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h |  4 ++--
 net/ipv4/igmp.c      | 24 ++++++++++++------------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 8e7eedb3a574..dd49ba9065ae 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -215,7 +215,7 @@ extern void ip_mc_init_dev(struct in_device *);
 extern void ip_mc_destroy_dev(struct in_device *);
 extern void ip_mc_up(struct in_device *);
 extern void ip_mc_down(struct in_device *);
-extern void ip_mc_dec_group(struct in_device *in_dev, u32 addr);
-extern void ip_mc_inc_group(struct in_device *in_dev, u32 addr);
+extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr);
+extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
 #endif
 #endif
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d8068c483781..a2e733a82de2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -144,8 +144,8 @@ static int sf_setstate(struct ip_mc_list *pmc);
 static void sf_markstate(struct ip_mc_list *pmc);
 #endif
 static void ip_mc_clear_src(struct ip_mc_list *pmc);
-static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
-			 int sfcount, __u32 *psfsrc, int delta);
+static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+			 int sfcount, __be32 *psfsrc, int delta);
 
 static void ip_ma_put(struct ip_mc_list *im)
 {
@@ -1193,7 +1193,7 @@ static void igmp_group_added(struct ip_mc_list *im)
  *	A socket has joined a multicast group on device dev.
  */
 
-void ip_mc_inc_group(struct in_device *in_dev, u32 addr)
+void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 {
 	struct ip_mc_list *im;
 
@@ -1252,7 +1252,7 @@ out:
  *	A socket has left a multicast group on device dev
  */
 
-void ip_mc_dec_group(struct in_device *in_dev, u32 addr)
+void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 {
 	struct ip_mc_list *i, **ip;
 	
@@ -1402,7 +1402,7 @@ int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
 
 
 static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
-	__u32 *psfsrc)
+	__be32 *psfsrc)
 {
 	struct ip_sf_list *psf, *psf_prev;
 	int rv = 0;
@@ -1450,8 +1450,8 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 #define igmp_ifc_event(x)	do { } while (0)
 #endif
 
-static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
-			 int sfcount, __u32 *psfsrc, int delta)
+static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+			 int sfcount, __be32 *psfsrc, int delta)
 {
 	struct ip_mc_list *pmc;
 	int	changerec = 0;
@@ -1517,7 +1517,7 @@ out_unlock:
  * Add multicast single-source filter to the interface list
  */
 static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
-	__u32 *psfsrc, int delta)
+	__be32 *psfsrc, int delta)
 {
 	struct ip_sf_list *psf, *psf_prev;
 
@@ -1623,8 +1623,8 @@ static int sf_setstate(struct ip_mc_list *pmc)
 /*
  * Add multicast source filter list to the interface list
  */
-static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode,
-			 int sfcount, __u32 *psfsrc, int delta)
+static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+			 int sfcount, __be32 *psfsrc, int delta)
 {
 	struct ip_mc_list *pmc;
 	int	isexclude;
@@ -1717,7 +1717,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
 int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 {
 	int err;
-	u32 addr = imr->imr_multiaddr.s_addr;
+	__be32 addr = imr->imr_multiaddr.s_addr;
 	struct ip_mc_socklist *iml=NULL, *i;
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
@@ -1791,7 +1791,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_mc_socklist *iml, **imlp;
 	struct in_device *in_dev;
-	u32 group = imr->imr_multiaddr.s_addr;
+	__be32 group = imr->imr_multiaddr.s_addr;
 	u32 ifindex;
 	int ret = -EADDRNOTAVAIL;
 
-- 
cgit v1.2.3


From 942bf921e922560c05fde6afb00ddedf6224c608 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:30:28 -0700
Subject: [IPV4]: IGMP on-the-wire data is net-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index dd49ba9065ae..fd207d9b2733 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -30,8 +30,8 @@ struct igmphdr
 {
 	__u8 type;
 	__u8 code;		/* For newer IGMP */
-	__u16 csum;
-	__u32 group;
+	__be16 csum;
+	__be32 group;
 };
 
 /* V3 group record types [grec_type] */
@@ -45,25 +45,25 @@ struct igmphdr
 struct igmpv3_grec {
 	__u8	grec_type;
 	__u8	grec_auxwords;
-	__u16	grec_nsrcs;
-	__u32	grec_mca;
-	__u32	grec_src[0];
+	__be16	grec_nsrcs;
+	__be32	grec_mca;
+	__be32	grec_src[0];
 };
 
 struct igmpv3_report {
 	__u8 type;
 	__u8 resv1;
-	__u16 csum;
-	__u16 resv2;
-	__u16 ngrec;
+	__be16 csum;
+	__be16 resv2;
+	__be16 ngrec;
 	struct igmpv3_grec grec[0];
 };
 
 struct igmpv3_query {
 	__u8 type;
 	__u8 code;
-	__u16 csum;
-	__u32 group;
+	__be16 csum;
+	__be32 group;
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	__u8 qrv:3,
 	     suppress:1,
@@ -76,8 +76,8 @@ struct igmpv3_query {
 #error "Please fix <asm/byteorder.h>"
 #endif
 	__u8 qqic;
-	__u16 nsrcs;
-	__u32 srcs[0];
+	__be16 nsrcs;
+	__be32 srcs[0];
 };
 
 #define IGMP_HOST_MEMBERSHIP_QUERY	0x11	/* From RFC1112 */
-- 
cgit v1.2.3


From ea4d9e7220d32348cc9742ba6d27de5165262664 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:30:52 -0700
Subject: [IPV4]: struct ip_sf_list and struct ip_sf_socklist annotated

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 4 ++--
 net/ipv4/igmp.c      | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index fd207d9b2733..4e9f3fe77cf9 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -136,7 +136,7 @@ struct ip_sf_socklist
 {
 	unsigned int		sl_max;
 	unsigned int		sl_count;
-	__u32			sl_addr[0];
+	__be32			sl_addr[0];
 };
 
 #define IP_SFLSIZE(count)	(sizeof(struct ip_sf_socklist) + \
@@ -159,7 +159,7 @@ struct ip_mc_socklist
 struct ip_sf_list
 {
 	struct ip_sf_list	*sf_next;
-	__u32			sf_inaddr;
+	__be32			sf_inaddr;
 	unsigned long		sf_count[2];	/* include/exclude counts */
 	unsigned char		sf_gsresp;	/* include in g & s response? */
 	unsigned char		sf_oldin;	/* change state */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a2e733a82de2..f6ca51a2dcc2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -426,7 +426,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 	first = 1;
 	psf_prev = NULL;
 	for (psf=*psf_list; psf; psf=psf_next) {
-		u32 *psrc;
+		__be32 *psrc;
 
 		psf_next = psf->sf_next;
 
@@ -455,7 +455,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 			skb = add_grhead(skb, pmc, type, &pgr);
 			first = 0;
 		}
-		psrc = (u32 *)skb_put(skb, sizeof(u32));
+		psrc = (__be32 *)skb_put(skb, sizeof(u32));
 		*psrc = psf->sf_inaddr;
 		scount++; stotal++;
 		if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
@@ -748,7 +748,7 @@ static void igmp_timer_expire(unsigned long data)
 }
 
 /* mark EXCLUDE-mode sources */
-static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
 {
 	struct ip_sf_list *psf;
 	int i, scount;
@@ -775,7 +775,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
 	return 1;
 }
 
-static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs)
+static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
 {
 	struct ip_sf_list *psf;
 	int i, scount;
-- 
cgit v1.2.3


From c0cda068aac3481d40795b115e4fd36f7d386e3a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:31:10 -0700
Subject: [IPV4]: ip_mc_sf_allow() annotated

ip_mc_sf_allow() expects addresses to be passed net-endian.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 2 +-
 net/ipv4/igmp.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 4e9f3fe77cf9..7514cceb4fe3 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -209,7 +209,7 @@ extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 		struct ip_msfilter __user *optval, int __user *optlen);
 extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 		struct group_filter __user *optval, int __user *optlen);
-extern int ip_mc_sf_allow(struct sock *sk, u32 local, u32 rmt, int dif);
+extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif);
 extern void ip_mr_init(void);
 extern void ip_mc_init_dev(struct in_device *);
 extern void ip_mc_destroy_dev(struct in_device *);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f6ca51a2dcc2..a2c7b2b1b3fb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2156,7 +2156,7 @@ done:
 /*
  * check if a multicast source filter allows delivery for a given <src,dst,intf>
  */
-int ip_mc_sf_allow(struct sock *sk, u32 loc_addr, u32 rmt_addr, int dif)
+int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_mc_socklist *pmc;
-- 
cgit v1.2.3


From 63007727e0bb09e8d906f73d36a09b9fac0d5893 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:31:32 -0700
Subject: [IPV4]: trivial igmp annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h |  2 +-
 net/ipv4/igmp.c      | 34 +++++++++++++++++-----------------
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 7514cceb4fe3..03f43e2893a4 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -140,7 +140,7 @@ struct ip_sf_socklist
 };
 
 #define IP_SFLSIZE(count)	(sizeof(struct ip_sf_socklist) + \
-	(count) * sizeof(__u32))
+	(count) * sizeof(__be32))
 
 #define IP_SFBLOCK	10	/* allocate this many at once */
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a2c7b2b1b3fb..6eee71647b7c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -138,7 +138,7 @@
 		time_before(jiffies, (in_dev)->mr_v2_seen)))
 
 static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
-static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr);
+static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr);
 static void igmpv3_clear_delrec(struct in_device *in_dev);
 static int sf_setstate(struct ip_mc_list *pmc);
 static void sf_markstate(struct ip_mc_list *pmc);
@@ -439,7 +439,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 		if (isquery)
 			psf->sf_gsresp = 0;
 
-		if (AVAILABLE(skb) < sizeof(u32) +
+		if (AVAILABLE(skb) < sizeof(__be32) +
 		    first*sizeof(struct igmpv3_grec)) {
 			if (truncate && !first)
 				break;	 /* truncate these */
@@ -455,7 +455,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 			skb = add_grhead(skb, pmc, type, &pgr);
 			first = 0;
 		}
-		psrc = (__be32 *)skb_put(skb, sizeof(u32));
+		psrc = (__be32 *)skb_put(skb, sizeof(__be32));
 		*psrc = psf->sf_inaddr;
 		scount++; stotal++;
 		if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
@@ -630,8 +630,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	struct igmphdr *ih;
 	struct rtable *rt;
 	struct net_device *dev = in_dev->dev;
-	u32	group = pmc ? pmc->multiaddr : 0;
-	u32	dst;
+	__be32	group = pmc ? pmc->multiaddr : 0;
+	__be32	dst;
 
 	if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
 		return igmpv3_send_report(in_dev, pmc);
@@ -803,7 +803,7 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
 	return 1;
 }
 
-static void igmp_heard_report(struct in_device *in_dev, u32 group)
+static void igmp_heard_report(struct in_device *in_dev, __be32 group)
 {
 	struct ip_mc_list *im;
 
@@ -828,7 +828,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	struct igmphdr 		*ih = skb->h.igmph;
 	struct igmpv3_query *ih3 = (struct igmpv3_query *)ih;
 	struct ip_mc_list	*im;
-	u32			group = ih->group;
+	__be32			group = ih->group;
 	int			max_delay;
 	int			mark = 0;
 
@@ -862,7 +862,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		ih3 = (struct igmpv3_query *) skb->h.raw;
 		if (ih3->nsrcs) {
 			if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) 
-					   + ntohs(ih3->nsrcs)*sizeof(__u32)))
+					   + ntohs(ih3->nsrcs)*sizeof(__be32)))
 				return;
 			ih3 = (struct igmpv3_query *) skb->h.raw;
 		}
@@ -985,7 +985,7 @@ drop:
  *	Add a filter to a device
  */
 
-static void ip_mc_filter_add(struct in_device *in_dev, u32 addr)
+static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
 {
 	char buf[MAX_ADDR_LEN];
 	struct net_device *dev = in_dev->dev;
@@ -1005,7 +1005,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, u32 addr)
  *	Remove a filter from a device
  */
 
-static void ip_mc_filter_del(struct in_device *in_dev, u32 addr)
+static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
 {
 	char buf[MAX_ADDR_LEN];
 	struct net_device *dev = in_dev->dev;
@@ -1055,7 +1055,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	spin_unlock_bh(&in_dev->mc_tomb_lock);
 }
 
-static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr)
+static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
 {
 	struct ip_mc_list *pmc, *pmc_prev;
 	struct ip_sf_list *psf, *psf_next;
@@ -1829,7 +1829,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 {
 	int err;
 	struct ip_mreqn imr;
-	u32 addr = mreqs->imr_multiaddr;
+	__be32 addr = mreqs->imr_multiaddr;
 	struct ip_mc_socklist *pmc;
 	struct in_device *in_dev = NULL;
 	struct inet_sock *inet = inet_sk(sk);
@@ -1883,7 +1883,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 		rv = !0;
 		for (i=0; i<psl->sl_count; i++) {
 			rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
-				sizeof(__u32));
+				sizeof(__be32));
 			if (rv == 0)
 				break;
 		}
@@ -1935,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
 	for (i=0; i<psl->sl_count; i++) {
 		rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
-			sizeof(__u32));
+			sizeof(__be32));
 		if (rv == 0)
 			break;
 	}
@@ -1960,7 +1960,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 {
 	int err = 0;
 	struct ip_mreqn	imr;
-	u32 addr = msf->imsf_multiaddr;
+	__be32 addr = msf->imsf_multiaddr;
 	struct ip_mc_socklist *pmc;
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
@@ -2044,7 +2044,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 {
 	int err, len, count, copycount;
 	struct ip_mreqn	imr;
-	u32 addr = msf->imsf_multiaddr;
+	__be32 addr = msf->imsf_multiaddr;
 	struct ip_mc_socklist *pmc;
 	struct in_device *in_dev;
 	struct inet_sock *inet = inet_sk(sk);
@@ -2103,7 +2103,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 {
 	int err, i, count, copycount;
 	struct sockaddr_in *psin;
-	u32 addr;
+	__be32 addr;
 	struct ip_mc_socklist *pmc;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_sf_socklist *psl;
-- 
cgit v1.2.3


From 46a97324a5ebdc1e343a0223d993e79551adab0f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:31:51 -0700
Subject: [IPV4]: TCP headers annotated

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8ebf497907f8..543f06371840 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -21,10 +21,10 @@
 #include <asm/byteorder.h>
 
 struct tcphdr {
-	__u16	source;
-	__u16	dest;
-	__u32	seq;
-	__u32	ack_seq;
+	__be16	source;
+	__be16	dest;
+	__be32	seq;
+	__be32	ack_seq;
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	__u16	res1:4,
 		doff:4,
@@ -50,9 +50,9 @@ struct tcphdr {
 #else
 #error	"Adjust your <asm/byteorder.h> defines"
 #endif	
-	__u16	window;
-	__u16	check;
-	__u16	urg_ptr;
+	__be16	window;
+	__be16	check;
+	__be16	urg_ptr;
 };
 
 /*
@@ -62,7 +62,7 @@ struct tcphdr {
  */
 union tcp_word_hdr { 
 	struct tcphdr hdr;
-	__u32 		  words[5];
+	__be32 		  words[5];
 }; 
 
 #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) 
-- 
cgit v1.2.3


From 269bd27e66037a7932cee6d6aa7ef7defd0bfe38 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:32:28 -0700
Subject: [TCP]: struct tcp_sack_block annotations

Some of the instances of tcp_sack_block are host-endian, some - net-endian.
Define struct tcp_sack_block_wire identical to struct tcp_sack_block
with u32 replaced with __be32; annotate uses of tcp_sack_block replacing
net-endian ones with tcp_sack_block_wire.  Change is obviously safe since
for cc(1) __be32 is typedefed to u32.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h                | 5 +++++
 net/ipv4/netfilter/ip_nat_helper.c | 2 +-
 net/ipv4/tcp_input.c               | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 543f06371840..9632aa866de4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -166,6 +166,11 @@ struct tcp_info
 #include <net/inet_timewait_sock.h>
 
 /* This defines a selective acknowledgement block. */
+struct tcp_sack_block_wire {
+	__be32	start_seq;
+	__be32	end_seq;
+};
+
 struct tcp_sack_block {
 	__u32	start_seq;
 	__u32	end_seq;
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 7f6a75984f6c..e9c5187ea5b2 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -283,7 +283,7 @@ sack_adjust(struct sk_buff *skb,
 	    struct ip_nat_seq *natseq)
 {
 	while (sackoff < sackend) {
-		struct tcp_sack_block *sack;
+		struct tcp_sack_block_wire *sack;
 		u_int32_t new_start_seq, new_end_seq;
 
 		sack = (void *)skb->data + sackoff;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b3def0df14fb..46984ffd73cd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -935,7 +935,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
-	struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
+	struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
 	int reord = tp->packets_out;
 	int prior_fackets;
-- 
cgit v1.2.3


From dddc93c05d7dba60b44866486502c155e96ab915 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:32:46 -0700
Subject: [TCP]: struct tcp_sock .pred_flags is net-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 9632aa866de4..0e058a2d1c6d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -216,7 +216,7 @@ struct tcp_sock {
  *	Header prediction flags
  *	0x5?10 << 16 + snd_wnd in net byte order
  */
-	__u32	pred_flags;
+	__be32	pred_flags;
 
 /*
  *	RFC793 variables by their proper names. This means you can
-- 
cgit v1.2.3


From b406313c733156c8eea7d9c1891476f400914367 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:34:02 -0700
Subject: [NET]: struct sock_exterr_skb annotations

->port is net-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/errqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index 408118a07763..92f8d4fab32b 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -38,7 +38,7 @@ struct sock_exterr_skb
 	} header;
 	struct sock_extended_err	ee;
 	u16				addr_offset;
-	u16				port;
+	__be16				port;
 };
 
 #endif
-- 
cgit v1.2.3


From bd6d610a14f2ed896b76dfb61fbdec829e44b8d3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:35:47 -0700
Subject: [IPV4]: ARP header annotated

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index a8b1a2071838..7f5714214ee3 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -130,11 +130,11 @@ struct arpreq_old {
 
 struct arphdr
 {
-	unsigned short	ar_hrd;		/* format of hardware address	*/
-	unsigned short	ar_pro;		/* format of protocol address	*/
+	__be16		ar_hrd;		/* format of hardware address	*/
+	__be16		ar_pro;		/* format of protocol address	*/
 	unsigned char	ar_hln;		/* length of hardware address	*/
 	unsigned char	ar_pln;		/* length of protocol address	*/
-	unsigned short	ar_op;		/* ARP opcode (command)		*/
+	__be16		ar_op;		/* ARP opcode (command)		*/
 
 #if 0
 	 /*
-- 
cgit v1.2.3


From 4e7e0c7592cafe5453e5b2f115fc0065d11b3d44 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:37:19 -0700
Subject: [IPV4]: UDP header annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 90223f057d50..014b41d1e308 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -20,10 +20,10 @@
 #include <linux/types.h>
 
 struct udphdr {
-	__u16	source;
-	__u16	dest;
-	__u16	len;
-	__u16	check;
+	__be16	source;
+	__be16	dest;
+	__be16	len;
+	__be16	check;
 };
 
 /* UDP socket options */
-- 
cgit v1.2.3


From b1dd39ac963040c2d282ab8026b9c9aa9306ea06 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:38:13 -0700
Subject: [IPV4]: ICMP header annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmp.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/icmp.h b/include/linux/icmp.h
index f0b571f1060b..878cfe4e587f 100644
--- a/include/linux/icmp.h
+++ b/include/linux/icmp.h
@@ -68,16 +68,16 @@
 struct icmphdr {
   __u8		type;
   __u8		code;
-  __u16		checksum;
+  __be16	checksum;
   union {
 	struct {
-		__u16	id;
-		__u16	sequence;
+		__be16	id;
+		__be16	sequence;
 	} echo;
-	__u32	gateway;
+	__be32	gateway;
 	struct {
-		__u16	__unused;
-		__u16	mtu;
+		__be16	__unused;
+		__be16	mtu;
 	} frag;
   } un;
 };
-- 
cgit v1.2.3


From 1b620154273d5cc57690e0d199282c6bb9e56974 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:39:09 -0700
Subject: [IPV4]: PIMv2 header annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index e05d54a90743..c7dd4c11f667 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -213,8 +213,8 @@ struct pimreghdr
 {
 	__u8	type;
 	__u8	reserved;
-	__u16	csum;
-	__u32	flags;
+	__be16	csum;
+	__be32	flags;
 };
 
 extern int pim_rcv_v1(struct sk_buff *);
-- 
cgit v1.2.3


From 114c7844f34c1608aec20ae7ff85cec471ac90ae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:39:29 -0700
Subject: [IPV4]: mroute annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h | 10 +++++-----
 net/ipv4/ipmr.c        |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index c7dd4c11f667..7da2cee8e132 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -142,7 +142,7 @@ struct vif_device
 	unsigned long	rate_limit;		/* Traffic shaping (NI) 	*/
 	unsigned char	threshold;		/* TTL threshold 		*/
 	unsigned short	flags;			/* Control flags 		*/
-	__u32		local,remote;		/* Addresses(remote for tunnels)*/
+	__be32		local,remote;		/* Addresses(remote for tunnels)*/
 	int		link;			/* Physical interface index	*/
 };
 
@@ -151,8 +151,8 @@ struct vif_device
 struct mfc_cache 
 {
 	struct mfc_cache *next;			/* Next entry on cache line 	*/
-	__u32 mfc_mcastgrp;			/* Group the entry belongs to 	*/
-	__u32 mfc_origin;			/* Source of packet 		*/
+	__be32 mfc_mcastgrp;			/* Group the entry belongs to 	*/
+	__be32 mfc_origin;			/* Source of packet 		*/
 	vifi_t mfc_parent;			/* Source interface		*/
 	int mfc_flags;				/* Flags on line		*/
 
@@ -179,9 +179,9 @@ struct mfc_cache
 #define MFC_LINES		64
 
 #ifdef __BIG_ENDIAN
-#define MFC_HASH(a,b)	((((a)>>24)^((b)>>26))&(MFC_LINES-1))
+#define MFC_HASH(a,b)	(((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1))
 #else
-#define MFC_HASH(a,b)	(((a)^((b)>>2))&(MFC_LINES-1))
+#define MFC_HASH(a,b)	((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1))
 #endif		
 
 #endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ba49588da242..97cfa97c8abb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -462,7 +462,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 	return 0;
 }
 
-static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
+static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
 {
 	int line=MFC_HASH(mcastgrp,origin);
 	struct mfc_cache *c;
@@ -1097,7 +1097,7 @@ static struct notifier_block ip_mr_notifier={
  *	important for multicast video.
  */
  
-static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
+static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 {
 	struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
 
-- 
cgit v1.2.3


From 4f765d842fa6e6fe15d555b247b640118d65b4dd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:43:07 -0700
Subject: [IPV4]: INET_MATCH() annotations

INET_MATCH() and friends depend on an interesting set of kludges:
	* there's a pair of adjacent fields in struct inet_sock - __be16 dport
followed by __u16 num.  We want to search by pair, so we combine the keys into
a single 32bit value and compare with 32bit value read from &...->dport.
	* on 64bit targets we combine comparisons with pair of adjacent __be32
fields in the same way.

Make sure that we don't mix those values with anything else and that pairs
we form them from have correct types.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h          |  2 +-
 include/net/inet_hashtables.h | 36 +++++++++++++++++++++++++-----------
 net/ipv4/inet_hashtables.c    |  2 +-
 net/ipv6/inet6_hashtables.c   |  8 ++++----
 4 files changed, 31 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index caca57df0d7d..6dc07ee7702e 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -461,7 +461,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 
 #define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash))				&& \
-	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))  	&& \
+	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))  	&& \
 	 ((__sk)->sk_family		== AF_INET6)		&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr))	&& \
 	 ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr))	&& \
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index b4491c9e2a5a..fb0c09c7090c 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -283,31 +283,45 @@ static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
 }
 
 /* Socket demux engine toys. */
+/* What happens here is ugly; there's a pair of adjacent fields in
+   struct inet_sock; __be16 dport followed by __u16 num.  We want to
+   search by pair, so we combine the keys into a single 32bit value
+   and compare with 32bit value read from &...->dport.  Let's at least
+   make sure that it's not mixed with anything else...
+   On 64bit targets we combine comparisons with pair of adjacent __be32
+   fields in the same way.
+*/
+typedef __u32 __bitwise __portpair;
 #ifdef __BIG_ENDIAN
 #define INET_COMBINED_PORTS(__sport, __dport) \
-	(((__u32)(__sport) << 16) | (__u32)(__dport))
+	((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport)))
 #else /* __LITTLE_ENDIAN */
 #define INET_COMBINED_PORTS(__sport, __dport) \
-	(((__u32)(__dport) << 16) | (__u32)(__sport))
+	((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport)))
 #endif
 
 #if (BITS_PER_LONG == 64)
+typedef __u64 __bitwise __addrpair;
 #ifdef __BIG_ENDIAN
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
-	const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr));
+	const __addrpair __name = (__force __addrpair) ( \
+				   (((__force __u64)(__be32)(__saddr)) << 32) | \
+				   ((__force __u64)(__be32)(__daddr)));
 #else /* __LITTLE_ENDIAN */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
-	const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
+	const __addrpair __name = (__force __addrpair) ( \
+				   (((__force __u64)(__be32)(__daddr)) << 32) | \
+				   ((__force __u64)(__be32)(__saddr)));
 #endif /* __BIG_ENDIAN */
 #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash))				&&	\
-	 ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
-	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
+	 ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
+	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash))				&&	\
-	 ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
-	 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
+	 ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
+	 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
@@ -315,13 +329,13 @@ static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 (inet_sk(__sk)->daddr		== (__saddr))		&&	\
 	 (inet_sk(__sk)->rcv_saddr	== (__daddr))		&&	\
-	 ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
+	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_hash == (__hash))				&&	\
 	 (inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
 	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))		&&	\
-	 ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
+	 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
 	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
 #endif /* 64-bit arch */
 
@@ -338,7 +352,7 @@ static inline struct sock *
 				  const int dif)
 {
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
+	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
 	struct sock *sk;
 	const struct hlist_node *node;
 	/* Optimize here for direct hit, only listening connections can
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fb296c9a7f3f..729d1eb39483 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -201,7 +201,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	u32 saddr = inet->daddr;
 	int dif = sk->sk_bound_dev_if;
 	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+	const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
 	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	struct sock *sk2;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index d2f3fc990bfa..8accd1fbeeda 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -64,7 +64,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
 {
 	struct sock *sk;
 	const struct hlist_node *node;
-	const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
+	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
@@ -82,7 +82,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo,
 	sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
 		const struct inet_timewait_sock *tw = inet_twsk(sk);
 
-		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
+		if(*((__portpair *)&(tw->tw_dport))	== ports	&&
 		   sk->sk_family		== PF_INET6) {
 			const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
 
@@ -171,7 +171,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	const struct in6_addr *daddr = &np->rcv_saddr;
 	const struct in6_addr *saddr = &np->daddr;
 	const int dif = sk->sk_bound_dev_if;
-	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+	const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
 	const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
 						inet->dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
@@ -188,7 +188,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 
 		tw = inet_twsk(sk2);
 
-		if(*((__u32 *)&(tw->tw_dport)) == ports		 &&
+		if(*((__portpair *)&(tw->tw_dport)) == ports		 &&
 		   sk2->sk_family	       == PF_INET6	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
 		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
-- 
cgit v1.2.3


From 9f8552996d969f56039ec88128cf5ad35b12f141 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:44:30 -0700
Subject: [IPV4]: inet_diag annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 10 +++++-----
 net/ipv4/inet_diag.c      | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index a4606e5810e5..6e8bc548635a 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -9,10 +9,10 @@
 
 /* Socket identity */
 struct inet_diag_sockid {
-	__u16	idiag_sport;
-	__u16	idiag_dport;
-	__u32	idiag_src[4];
-	__u32	idiag_dst[4];
+	__be16	idiag_sport;
+	__be16	idiag_dport;
+	__be32	idiag_src[4];
+	__be32	idiag_dst[4];
 	__u32	idiag_if;
 	__u32	idiag_cookie[2];
 #define INET_DIAG_NOCOOKIE (~0U)
@@ -67,7 +67,7 @@ struct inet_diag_hostcond {
 	__u8	family;
 	__u8	prefix_len;
 	int	port;
-	__u32	addr[0];
+	__be32	addr[0];
 };
 
 /* Base info structure. It contains socket identity (addrs/ports/cookie)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 492858e6faf0..77761ac4f7bb 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -36,8 +36,8 @@
 static const struct inet_diag_handler **inet_diag_table;
 
 struct inet_diag_entry {
-	u32 *saddr;
-	u32 *daddr;
+	__be32 *saddr;
+	__be32 *daddr;
 	u16 sport;
 	u16 dport;
 	u16 family;
@@ -294,7 +294,7 @@ out:
 	return err;
 }
 
-static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
+static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
 {
 	int words = bits >> 5;
 
@@ -305,8 +305,8 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
 			return 0;
 	}
 	if (bits) {
-		__u32 w1, w2;
-		__u32 mask;
+		__be32 w1, w2;
+		__be32 mask;
 
 		w1 = a1[words];
 		w2 = a2[words];
@@ -352,7 +352,7 @@ static int inet_diag_bc_run(const void *bc, int len,
 		case INET_DIAG_BC_S_COND:
 		case INET_DIAG_BC_D_COND: {
 			struct inet_diag_hostcond *cond;
-			u32 *addr;
+			__be32 *addr;
 
 			cond = (struct inet_diag_hostcond *)(op + 1);
 			if (cond->port != -1 &&
-- 
cgit v1.2.3


From 48818f822d2b2e16f4bf4d1ed1185e7d2146dc34 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:44:54 -0700
Subject: [IPV6]: struct in6_addr annotations

in6_addr elements are net-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h | 4 ++--
 include/net/ipv6.h  | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index d776829b443f..348ecd4a36fe 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -32,8 +32,8 @@ struct in6_addr
 	union 
 	{
 		__u8		u6_addr8[16];
-		__u16		u6_addr16[8];
-		__u32		u6_addr32[4];
+		__be16		u6_addr16[8];
+		__be32		u6_addr32[4];
 	} in6_u;
 #define s6_addr			in6_u.u6_addr8
 #define s6_addr16		in6_u.u6_addr16
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 72bf47b2a4e0..8223c4410b4b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -318,8 +318,8 @@ static inline void ipv6_addr_prefix(struct in6_addr *pfx,
 
 #ifndef __HAVE_ARCH_ADDR_SET
 static inline void ipv6_addr_set(struct in6_addr *addr, 
-				     __u32 w1, __u32 w2,
-				     __u32 w3, __u32 w4)
+				     __be32 w1, __be32 w2,
+				     __be32 w3, __be32 w4)
 {
 	addr->s6_addr32[0] = w1;
 	addr->s6_addr32[1] = w2;
@@ -337,7 +337,7 @@ static inline int ipv6_addr_equal(const struct in6_addr *a1,
 		a1->s6_addr32[3] == a2->s6_addr32[3]);
 }
 
-static inline int __ipv6_prefix_equal(const u32 *a1, const u32 *a2,
+static inline int __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2,
 				      unsigned int prefixlen)
 {
 	unsigned pdw, pbi;
-- 
cgit v1.2.3


From 43505077df075545e9b28b7c6ea12d82b3caf036 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:45:11 -0700
Subject: [IPV6]: IPv6 headers annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 6dc07ee7702e..4f435c59de06 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -99,22 +99,22 @@ struct ipv6_destopt_hao {
 struct ipv6_auth_hdr {
 	__u8  nexthdr;
 	__u8  hdrlen;           /* This one is measured in 32 bit units! */
-	__u16 reserved;
-	__u32 spi;
-	__u32 seq_no;           /* Sequence number */
+	__be16 reserved;
+	__be32 spi;
+	__be32 seq_no;           /* Sequence number */
 	__u8  auth_data[0];     /* Length variable but >=4. Mind the 64 bit alignment! */
 };
 
 struct ipv6_esp_hdr {
-	__u32 spi;
-	__u32 seq_no;           /* Sequence number */
+	__be32 spi;
+	__be32 seq_no;           /* Sequence number */
 	__u8  enc_data[0];      /* Length variable but >=8. Mind the 64 bit alignment! */
 };
 
 struct ipv6_comp_hdr {
 	__u8 nexthdr;
 	__u8 flags;
-	__u16 cpi;
+	__be16 cpi;
 };
 
 /*
@@ -136,7 +136,7 @@ struct ipv6hdr {
 #endif
 	__u8			flow_lbl[3];
 
-	__u16			payload_len;
+	__be16			payload_len;
 	__u8			nexthdr;
 	__u8			hop_limit;
 
-- 
cgit v1.2.3


From e2e38e819bd03e9674c102aaa2c9dc8151a3c3d0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:45:27 -0700
Subject: [IPV6]: sin6_port is net-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index 348ecd4a36fe..9be6a4756f0b 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -53,7 +53,7 @@ extern const struct in6_addr in6addr_loopback;
 
 struct sockaddr_in6 {
 	unsigned short int	sin6_family;    /* AF_INET6 */
-	__u16			sin6_port;      /* Transport layer port # */
+	__be16			sin6_port;      /* Transport layer port # */
 	__u32			sin6_flowinfo;  /* IPv6 flow information */
 	struct in6_addr		sin6_addr;      /* IPv6 address */
 	__u32			sin6_scope_id;  /* scope id (new in RFC2553) */
-- 
cgit v1.2.3


From 8f83f23e6db8b9a9fe787d02f73489224668c4e2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:46:11 -0700
Subject: [XFRM]: ports in struct xfrm_selector annotated

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h   | 8 ++++----
 net/ipv4/xfrm4_state.c | 4 ++--
 net/ipv6/xfrm6_state.c | 4 ++--
 net/key/af_key.c       | 8 ++++----
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 14ecd19f4cdc..3aae9b9ce79b 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -49,10 +49,10 @@ struct xfrm_selector
 {
 	xfrm_address_t	daddr;
 	xfrm_address_t	saddr;
-	__u16	dport;
-	__u16	dport_mask;
-	__u16	sport;
-	__u16	sport_mask;
+	__be16	dport;
+	__be16	dport_mask;
+	__be16	sport;
+	__be16	sport_mask;
 	__u16	family;
 	__u8	prefixlen_d;
 	__u8	prefixlen_s;
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index fe2034494d08..3cc3df0c6ece 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -29,9 +29,9 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	x->sel.daddr.a4 = fl->fl4_dst;
 	x->sel.saddr.a4 = fl->fl4_src;
 	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = ~0;
+	x->sel.dport_mask = htons(0xffff);
 	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = ~0;
+	x->sel.sport_mask = htons(0xffff);
 	x->sel.prefixlen_d = 32;
 	x->sel.prefixlen_s = 32;
 	x->sel.proto = fl->proto;
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 711bfafb2472..9ddaa9d41539 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -29,9 +29,9 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 	ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
 	ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
 	x->sel.dport = xfrm_flowi_dport(fl);
-	x->sel.dport_mask = ~0;
+	x->sel.dport_mask = htons(0xffff);
 	x->sel.sport = xfrm_flowi_sport(fl);
-	x->sel.sport_mask = ~0;
+	x->sel.sport_mask = htons(0xffff);
 	x->sel.prefixlen_d = 128;
 	x->sel.prefixlen_s = 128;
 	x->sel.proto = fl->proto;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 83b443ddc72f..ff98e70b0931 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2140,7 +2140,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
 	xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
 	if (xp->selector.sport)
-		xp->selector.sport_mask = ~0;
+		xp->selector.sport_mask = htons(0xffff);
 
 	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
 	pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr);
@@ -2153,7 +2153,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 
 	xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
 	if (xp->selector.dport)
-		xp->selector.dport_mask = ~0;
+		xp->selector.dport_mask = htons(0xffff);
 
 	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
 	if (sec_ctx != NULL) {
@@ -2243,7 +2243,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
 	sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
 	if (sel.sport)
-		sel.sport_mask = ~0;
+		sel.sport_mask = htons(0xffff);
 
 	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
 	pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
@@ -2251,7 +2251,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
 	sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
 	if (sel.dport)
-		sel.dport_mask = ~0;
+		sel.dport_mask = htons(0xffff);
 
 	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
 	memset(&tmp, 0, sizeof(struct xfrm_policy));
-- 
cgit v1.2.3


From 737b5761dfc609b5be4163deb2cf09226f56bcbc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:46:48 -0700
Subject: [XFRM]: xfrm_address_t annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 3aae9b9ce79b..4b321842d65f 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -12,8 +12,8 @@
  */
 typedef union
 {
-	__u32		a4;
-	__u32		a6[4];
+	__be32		a4;
+	__be32		a6[4];
 } xfrm_address_t;
 
 /* Ident of a specific xfrm_state. It is used on input to lookup
-- 
cgit v1.2.3


From e037c39bf965ca66fde902e191d849a90de278fe Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:47:40 -0700
Subject: [XFRM]: struct xfrm_id annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 4b321842d65f..c894267ff5dd 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -23,7 +23,7 @@ typedef union
 struct xfrm_id
 {
 	xfrm_address_t	daddr;
-	__u32		spi;
+	__be32		spi;
 	__u8		proto;
 };
 
-- 
cgit v1.2.3


From 9916ecb0a6f52f1475fa2f71845227d3c75fb40c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Sep 2006 18:48:48 -0700
Subject: [XFRM]: struct xfrm_usersa_id annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index c894267ff5dd..430afd058269 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -281,7 +281,7 @@ struct xfrm_usersa_info {
 
 struct xfrm_usersa_id {
 	xfrm_address_t			daddr;
-	__u32				spi;
+	__be32				spi;
 	__u16				family;
 	__u8				proto;
 };
-- 
cgit v1.2.3


From cbde1668e4f08e0a150207646010bc65e1e2a42b Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 27 Sep 2006 22:40:19 -0700
Subject: [NET]: Move netlink interface bits to linux/if_link.h.

Moving netlink interface bits to linux/if.h is rather troublesome for
applications including both linux/if.h (which was changed to be included
from linux/rtnetlink.h automatically) and net/if.h.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if.h        | 130 --------------------------------------------
 include/linux/if_link.h   | 136 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rtnetlink.h |   2 +-
 3 files changed, 137 insertions(+), 131 deletions(-)
 create mode 100644 include/linux/if_link.h

(limited to 'include/linux')

diff --git a/include/linux/if.h b/include/linux/if.h
index 8018c2e22c0c..32bf419351f1 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -214,134 +214,4 @@ struct ifconf
 #define	ifc_buf	ifc_ifcu.ifcu_buf		/* buffer address	*/
 #define	ifc_req	ifc_ifcu.ifcu_req		/* array of structures	*/
 
-/* The struct should be in sync with struct net_device_stats */
-struct rtnl_link_stats
-{
-	__u32	rx_packets;		/* total packets received	*/
-	__u32	tx_packets;		/* total packets transmitted	*/
-	__u32	rx_bytes;		/* total bytes received 	*/
-	__u32	tx_bytes;		/* total bytes transmitted	*/
-	__u32	rx_errors;		/* bad packets received		*/
-	__u32	tx_errors;		/* packet transmit problems	*/
-	__u32	rx_dropped;		/* no space in linux buffers	*/
-	__u32	tx_dropped;		/* no space available in linux	*/
-	__u32	multicast;		/* multicast packets received	*/
-	__u32	collisions;
-
-	/* detailed rx_errors: */
-	__u32	rx_length_errors;
-	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
-	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
-	__u32	rx_frame_errors;	/* recv'd frame alignment error */
-	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	__u32	rx_missed_errors;	/* receiver missed packet	*/
-
-	/* detailed tx_errors */
-	__u32	tx_aborted_errors;
-	__u32	tx_carrier_errors;
-	__u32	tx_fifo_errors;
-	__u32	tx_heartbeat_errors;
-	__u32	tx_window_errors;
-
-	/* for cslip etc */
-	__u32	rx_compressed;
-	__u32	tx_compressed;
-};
-
-/* The struct should be in sync with struct ifmap */
-struct rtnl_link_ifmap
-{
-	__u64	mem_start;
-	__u64	mem_end;
-	__u64	base_addr;
-	__u16	irq;
-	__u8	dma;
-	__u8	port;
-};
-
-enum
-{
-	IFLA_UNSPEC,
-	IFLA_ADDRESS,
-	IFLA_BROADCAST,
-	IFLA_IFNAME,
-	IFLA_MTU,
-	IFLA_LINK,
-	IFLA_QDISC,
-	IFLA_STATS,
-	IFLA_COST,
-#define IFLA_COST IFLA_COST
-	IFLA_PRIORITY,
-#define IFLA_PRIORITY IFLA_PRIORITY
-	IFLA_MASTER,
-#define IFLA_MASTER IFLA_MASTER
-	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
-#define IFLA_WIRELESS IFLA_WIRELESS
-	IFLA_PROTINFO,		/* Protocol specific information for a link */
-#define IFLA_PROTINFO IFLA_PROTINFO
-	IFLA_TXQLEN,
-#define IFLA_TXQLEN IFLA_TXQLEN
-	IFLA_MAP,
-#define IFLA_MAP IFLA_MAP
-	IFLA_WEIGHT,
-#define IFLA_WEIGHT IFLA_WEIGHT
-	IFLA_OPERSTATE,
-	IFLA_LINKMODE,
-	__IFLA_MAX
-};
-
-
-#define IFLA_MAX (__IFLA_MAX - 1)
-
-/* ifi_flags.
-
-   IFF_* flags.
-
-   The only change is:
-   IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
-   more not changeable by user. They describe link media
-   characteristics and set by device driver.
-
-   Comments:
-   - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
-   - If neither of these three flags are set;
-     the interface is NBMA.
-
-   - IFF_MULTICAST does not mean anything special:
-   multicasts can be used on all not-NBMA links.
-   IFF_MULTICAST means that this media uses special encapsulation
-   for multicast frames. Apparently, all IFF_POINTOPOINT and
-   IFF_BROADCAST devices are able to use multicasts too.
- */
-
-/* IFLA_LINK.
-   For usual devices it is equal ifi_index.
-   If it is a "virtual interface" (f.e. tunnel), ifi_link
-   can point to real physical interface (f.e. for bandwidth calculations),
-   or maybe 0, what means, that real media is unknown (usual
-   for IPIP tunnels, when route to endpoint is allowed to change)
- */
-
-/* Subtype attributes for IFLA_PROTINFO */
-enum
-{
-	IFLA_INET6_UNSPEC,
-	IFLA_INET6_FLAGS,	/* link flags			*/
-	IFLA_INET6_CONF,	/* sysctl parameters		*/
-	IFLA_INET6_STATS,	/* statistics			*/
-	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
-	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
-	__IFLA_INET6_MAX
-};
-
-#define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
-
-struct ifla_cacheinfo
-{
-	__u32	max_reasm_len;
-	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
-	__u32	reachable_time;
-	__u32	retrans_time;
-};
-
 #endif /* _LINUX_IF_H */
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
new file mode 100644
index 000000000000..e963a077e6f5
--- /dev/null
+++ b/include/linux/if_link.h
@@ -0,0 +1,136 @@
+#ifndef _LINUX_IF_LINK_H
+#define _LINUX_IF_LINK_H
+
+#include <linux/netlink.h>
+
+/* The struct should be in sync with struct net_device_stats */
+struct rtnl_link_stats
+{
+	__u32	rx_packets;		/* total packets received	*/
+	__u32	tx_packets;		/* total packets transmitted	*/
+	__u32	rx_bytes;		/* total bytes received 	*/
+	__u32	tx_bytes;		/* total bytes transmitted	*/
+	__u32	rx_errors;		/* bad packets received		*/
+	__u32	tx_errors;		/* packet transmit problems	*/
+	__u32	rx_dropped;		/* no space in linux buffers	*/
+	__u32	tx_dropped;		/* no space available in linux	*/
+	__u32	multicast;		/* multicast packets received	*/
+	__u32	collisions;
+
+	/* detailed rx_errors: */
+	__u32	rx_length_errors;
+	__u32	rx_over_errors;		/* receiver ring buff overflow	*/
+	__u32	rx_crc_errors;		/* recved pkt with crc error	*/
+	__u32	rx_frame_errors;	/* recv'd frame alignment error */
+	__u32	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	__u32	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	__u32	tx_aborted_errors;
+	__u32	tx_carrier_errors;
+	__u32	tx_fifo_errors;
+	__u32	tx_heartbeat_errors;
+	__u32	tx_window_errors;
+
+	/* for cslip etc */
+	__u32	rx_compressed;
+	__u32	tx_compressed;
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap
+{
+	__u64	mem_start;
+	__u64	mem_end;
+	__u64	base_addr;
+	__u16	irq;
+	__u8	dma;
+	__u8	port;
+};
+
+enum
+{
+	IFLA_UNSPEC,
+	IFLA_ADDRESS,
+	IFLA_BROADCAST,
+	IFLA_IFNAME,
+	IFLA_MTU,
+	IFLA_LINK,
+	IFLA_QDISC,
+	IFLA_STATS,
+	IFLA_COST,
+#define IFLA_COST IFLA_COST
+	IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+	IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+	IFLA_WIRELESS,		/* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+	IFLA_PROTINFO,		/* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+	IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+	IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+	IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+	IFLA_OPERSTATE,
+	IFLA_LINKMODE,
+	__IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+/* ifi_flags.
+
+   IFF_* flags.
+
+   The only change is:
+   IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+   more not changeable by user. They describe link media
+   characteristics and set by device driver.
+
+   Comments:
+   - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+   - If neither of these three flags are set;
+     the interface is NBMA.
+
+   - IFF_MULTICAST does not mean anything special:
+   multicasts can be used on all not-NBMA links.
+   IFF_MULTICAST means that this media uses special encapsulation
+   for multicast frames. Apparently, all IFF_POINTOPOINT and
+   IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+   For usual devices it is equal ifi_index.
+   If it is a "virtual interface" (f.e. tunnel), ifi_link
+   can point to real physical interface (f.e. for bandwidth calculations),
+   or maybe 0, what means, that real media is unknown (usual
+   for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum
+{
+	IFLA_INET6_UNSPEC,
+	IFLA_INET6_FLAGS,	/* link flags			*/
+	IFLA_INET6_CONF,	/* sysctl parameters		*/
+	IFLA_INET6_STATS,	/* statistics			*/
+	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
+	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
+	__IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
+
+struct ifla_cacheinfo
+{
+	__u32	max_reasm_len;
+	__u32	tstamp;		/* ipv6InterfaceTable updated timestamp */
+	__u32	reachable_time;
+	__u32	retrans_time;
+};
+
+#endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 9c92dc8b9a08..3a18addaed4c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -2,7 +2,7 @@
 #define __LINUX_RTNETLINK_H
 
 #include <linux/netlink.h>
-#include <linux/if.h>
+#include <linux/if_link.h>
 
 /****
  *		Routing/neighbour discovery messages.
-- 
cgit v1.2.3


From b854d0d218688b30ccea70521d6ea5f3f56d4e12 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 27 Sep 2006 22:43:05 -0700
Subject: [NET] KBUILD: Add missing entries for new net headers.

Add the following for userspace export by the 'headers_include'
make target: linux/fib_rules.h, linux/if_addr.h, linux/if_link.h,
linux/neighbour.h.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 1df2ac30a4d2..f7a52e19b4be 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -58,6 +58,7 @@ header-y += elf-em.h
 header-y += fadvise.h
 header-y += fd.h
 header-y += fdreg.h
+header-y += fib_rules.h
 header-y += ftape-header-segment.h
 header-y += ftape-vendors.h
 header-y += fuse.h
@@ -70,6 +71,7 @@ header-y += hysdn_if.h
 header-y += i2c-dev.h
 header-y += i8k.h
 header-y += icmp.h
+header-y += if_addr.h
 header-y += if_arcnet.h
 header-y += if_arp.h
 header-y += if_bonding.h
@@ -79,6 +81,7 @@ header-y += if_fddi.h
 header-y += if.h
 header-y += if_hippi.h
 header-y += if_infiniband.h
+header-y += if_link.h
 header-y += if_packet.h
 header-y += if_plip.h
 header-y += if_ppp.h
@@ -110,6 +113,7 @@ header-y += mmtimer.h
 header-y += mqueue.h
 header-y += mtio.h
 header-y += ncp_no.h
+header-y += neighbour.h
 header-y += netfilter_arp.h
 header-y += netrom.h
 header-y += nfs2.h
-- 
cgit v1.2.3


From d77072ecfb6d28287d5e2a61d60d87a3a444ac97 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Sep 2006 14:20:34 -0700
Subject: [NET]: Annotate dst_ops protocol

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 include/net/dst.h         | 2 +-
 net/core/neighbour.c      | 2 +-
 net/ethernet/eth.c        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 13d6d4eb8b3a..9264139bd8df 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -187,7 +187,7 @@ struct hh_cache
 {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
-	unsigned short  hh_type;	/* protocol identifier, f.e ETH_P_IP
+	__be16		hh_type;	/* protocol identifier, f.e ETH_P_IP
                                          *  NOTE:  For VLANs, this will be the
                                          *  encapuslated type. --BLG
                                          */
diff --git a/include/net/dst.h b/include/net/dst.h
index a8d825f90305..e156e38e4ac3 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -84,7 +84,7 @@ struct dst_entry
 struct dst_ops
 {
 	unsigned short		family;
-	unsigned short		protocol;
+	__be16			protocol;
 	unsigned		gc_thresh;
 
 	int			(*gc)(void);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b6c69e1463e8..8ce8c471d868 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1079,7 +1079,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
 }
 
 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
-			  u16 protocol)
+			  __be16 protocol)
 {
 	struct hh_cache	*hh;
 	struct net_device *dev = dst->dev;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 43863933f27f..4bd78c8cfb26 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -223,7 +223,7 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
  */
 int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
 {
-	unsigned short type = hh->hh_type;
+	__be16 type = hh->hh_type;
 	struct ethhdr *eth;
 	struct net_device *dev = neigh->dev;
 
-- 
cgit v1.2.3


From 59b8bfd8fd608821e5addc9f4682c7f2424afd8c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Sep 2006 14:21:07 -0700
Subject: [NETFILTER]: netfilter misc annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_arp/arp_tables.h | 6 +++---
 include/linux/netfilter_ipv4/ip_queue.h  | 2 +-
 net/ipv4/netfilter.c                     | 4 ++--
 net/ipv4/netfilter/arp_tables.c          | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 149e87c9ab13..44e39b61d9e7 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -46,11 +46,11 @@ struct arpt_arp {
 	struct arpt_devaddr_info tgt_devaddr;
 
 	/* ARP operation code. */
-	u_int16_t arpop, arpop_mask;
+	__be16 arpop, arpop_mask;
 
 	/* ARP hardware address and protocol address format. */
-	u_int16_t arhrd, arhrd_mask;
-	u_int16_t arpro, arpro_mask;
+	__be16 arhrd, arhrd_mask;
+	__be16 arpro, arpro_mask;
 
 	/* The protocol address length is only accepted if it is 4
 	 * so there is no use in offering a way to do filtering on it.
diff --git a/include/linux/netfilter_ipv4/ip_queue.h b/include/linux/netfilter_ipv4/ip_queue.h
index aa08d68c4841..a03507f465f8 100644
--- a/include/linux/netfilter_ipv4/ip_queue.h
+++ b/include/linux/netfilter_ipv4/ip_queue.h
@@ -26,7 +26,7 @@ typedef struct ipq_packet_msg {
 	unsigned int hook;		/* Netfilter hook we rode in on */
 	char indev_name[IFNAMSIZ];	/* Name of incoming interface */
 	char outdev_name[IFNAMSIZ];	/* Name of outgoing interface */
-	unsigned short hw_protocol;	/* Hardware protocol (network order) */
+	__be16 hw_protocol;		/* Hardware protocol (network order) */
 	unsigned short hw_type;		/* Hardware type */
 	unsigned char hw_addrlen;	/* Hardware address length */
 	unsigned char hw_addr[8];	/* Hardware address */
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index f88347de21a9..5ac15379a0cf 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -128,8 +128,8 @@ EXPORT_SYMBOL(ip_nat_decode_session);
  */
 
 struct ip_rt_info {
-	u_int32_t daddr;
-	u_int32_t saddr;
+	__be32 daddr;
+	__be32 saddr;
 	u_int8_t tos;
 };
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 85f0d73ebfb4..17e1a687ab45 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -80,7 +80,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 {
 	char *arpptr = (char *)(arphdr + 1);
 	char *src_devaddr, *tgt_devaddr;
-	u32 src_ipaddr, tgt_ipaddr;
+	__be32 src_ipaddr, tgt_ipaddr;
 	int i, ret;
 
 #define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg))
-- 
cgit v1.2.3


From cdcb71bf964e02e0a22007f5d90ead7bede3b85b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Sep 2006 14:21:37 -0700
Subject: [NETFILTER]: conntrack annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack.h       |  2 +-
 include/linux/netfilter_ipv4/ip_conntrack_tuple.h | 16 ++---
 net/ipv4/netfilter/ip_conntrack_amanda.c          |  6 +-
 net/ipv4/netfilter/ip_conntrack_core.c            | 12 ++--
 net/ipv4/netfilter/ip_conntrack_ftp.c             |  6 +-
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c     |  4 +-
 net/ipv4/netfilter/ip_conntrack_irc.c             |  5 +-
 net/ipv4/netfilter/ip_conntrack_netbios_ns.c      | 10 +--
 net/ipv4/netfilter/ip_conntrack_netlink.c         | 82 +++++++++++------------
 net/ipv4/netfilter/ip_conntrack_proto_icmp.c      |  4 +-
 net/ipv4/netfilter/ip_conntrack_proto_sctp.c      |  2 +-
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c       |  6 +-
 net/ipv4/netfilter/ip_conntrack_sip.c             | 16 ++---
 net/ipv4/netfilter/ip_conntrack_tftp.c            |  8 +--
 14 files changed, 90 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index 51dbec1892c8..64e868034c4a 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -157,7 +157,7 @@ struct ip_conntrack_expect
 	unsigned int flags;
 
 #ifdef CONFIG_IP_NF_NAT_NEEDED
-	u_int32_t saved_ip;
+	__be32 saved_ip;
 	/* This is the original per-proto part, used to map the
 	 * expected connection the way the recipient expects. */
 	union ip_conntrack_manip_proto saved_proto;
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
index 2fdabdb4c0ef..c228bde74c33 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
@@ -23,13 +23,13 @@ union ip_conntrack_manip_proto
 		__be16 port;
 	} tcp;
 	struct {
-		u_int16_t port;
+		__be16 port;
 	} udp;
 	struct {
-		u_int16_t id;
+		__be16 id;
 	} icmp;
 	struct {
-		u_int16_t port;
+		__be16 port;
 	} sctp;
 	struct {
 		__be16 key;	/* key is 32bit, pptp only uses 16 */
@@ -39,7 +39,7 @@ union ip_conntrack_manip_proto
 /* The manipulable part of the tuple. */
 struct ip_conntrack_manip
 {
-	u_int32_t ip;
+	__be32 ip;
 	union ip_conntrack_manip_proto u;
 };
 
@@ -50,22 +50,22 @@ struct ip_conntrack_tuple
 
 	/* These are the parts of the tuple which are fixed. */
 	struct {
-		u_int32_t ip;
+		__be32 ip;
 		union {
 			/* Add other protocols here. */
 			u_int16_t all;
 
 			struct {
-				u_int16_t port;
+				__be16 port;
 			} tcp;
 			struct {
-				u_int16_t port;
+				__be16 port;
 			} udp;
 			struct {
 				u_int8_t type, code;
 			} icmp;
 			struct {
-				u_int16_t port;
+				__be16 port;
 			} sctp;
 			struct {
 				__be16 key;	/* key is 32bit, 
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index 0a7bd7f04061..6c7383a8e42b 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -155,11 +155,11 @@ static int help(struct sk_buff **pskb,
 		exp->tuple.dst.protonum = IPPROTO_TCP;
 		exp->tuple.dst.u.tcp.port = htons(port);
 
-		exp->mask.src.ip = 0xFFFFFFFF;
+		exp->mask.src.ip = htonl(0xFFFFFFFF);
 		exp->mask.src.u.tcp.port = 0;
-		exp->mask.dst.ip = 0xFFFFFFFF;
+		exp->mask.dst.ip = htonl(0xFFFFFFFF);
 		exp->mask.dst.protonum = 0xFF;
-		exp->mask.dst.u.tcp.port = 0xFFFF;
+		exp->mask.dst.u.tcp.port = htons(0xFFFF);
 
 		if (ip_nat_amanda_hook)
 			ret = ip_nat_amanda_hook(pskb, ctinfo, off - dataoff,
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index c432b3163609..143c4668538b 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -149,8 +149,8 @@ static unsigned int ip_conntrack_hash_rnd;
 static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
 			    unsigned int size, unsigned int rnd)
 {
-	return (jhash_3words(tuple->src.ip,
-	                     (tuple->dst.ip ^ tuple->dst.protonum),
+	return (jhash_3words((__force u32)tuple->src.ip,
+	                     ((__force u32)tuple->dst.ip ^ tuple->dst.protonum),
 	                     (tuple->src.u.all | (tuple->dst.u.all << 16)),
 	                     rnd) % size);
 }
@@ -1169,9 +1169,9 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct,
 int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
 			       const struct ip_conntrack_tuple *tuple)
 {
-	NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
+	NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16),
 		&tuple->src.u.tcp.port);
-	NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
+	NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16),
 		&tuple->dst.u.tcp.port);
 	return 0;
 
@@ -1186,9 +1186,9 @@ int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
 		return -EINVAL;
 
 	t->src.u.tcp.port =
-		*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
+		*(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
 	t->dst.u.tcp.port =
-		*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
+		*(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
 
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 1d18c863f064..93dcf960662f 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -425,8 +425,8 @@ static int help(struct sk_buff **pskb,
 	exp->tuple.src.u.tcp.port = 0; /* Don't care. */
 	exp->tuple.dst.protonum = IPPROTO_TCP;
 	exp->mask = ((struct ip_conntrack_tuple)
-		{ { 0xFFFFFFFF, { 0 } },
-		  { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
+		{ { htonl(0xFFFFFFFF), { 0 } },
+		  { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }});
 
 	exp->expectfn = NULL;
 	exp->flags = 0;
@@ -488,7 +488,7 @@ static int __init ip_conntrack_ftp_init(void)
 	for (i = 0; i < ports_c; i++) {
 		ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
 		ftp[i].tuple.dst.protonum = IPPROTO_TCP;
-		ftp[i].mask.src.u.tcp.port = 0xFFFF;
+		ftp[i].mask.src.u.tcp.port = htons(0xFFFF);
 		ftp[i].mask.dst.protonum = 0xFF;
 		ftp[i].max_expected = 1;
 		ftp[i].timeout = 5 * 60; /* 5 minutes */
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index fb0aee691721..a2af5e0c7f99 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -242,10 +242,10 @@ exp_gre(struct ip_conntrack *ct,
 	exp_orig->tuple.dst.u.gre.key = callid;
 	exp_orig->tuple.dst.protonum = IPPROTO_GRE;
 
-	exp_orig->mask.src.ip = 0xffffffff;
+	exp_orig->mask.src.ip = htonl(0xffffffff);
 	exp_orig->mask.src.u.all = 0;
 	exp_orig->mask.dst.u.gre.key = htons(0xffff);
-	exp_orig->mask.dst.ip = 0xffffffff;
+	exp_orig->mask.dst.ip = htonl(0xffffffff);
 	exp_orig->mask.dst.protonum = 0xff;
 
 	exp_orig->master = ct;
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index 44889075f3b2..75f7c3db1619 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -218,7 +218,8 @@ static int help(struct sk_buff **pskb,
 				    IPPROTO_TCP }});
 			exp->mask = ((struct ip_conntrack_tuple)
 				{ { 0, { 0 } },
-				  { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
+				  { htonl(0xFFFFFFFF),
+					{ .tcp = { htons(0xFFFF) } }, 0xFF }});
 			exp->expectfn = NULL;
 			exp->flags = 0;
 			if (ip_nat_irc_hook)
@@ -266,7 +267,7 @@ static int __init ip_conntrack_irc_init(void)
 		hlpr = &irc_helpers[i];
 		hlpr->tuple.src.u.tcp.port = htons(ports[i]);
 		hlpr->tuple.dst.protonum = IPPROTO_TCP;
-		hlpr->mask.src.u.tcp.port = 0xFFFF;
+		hlpr->mask.src.u.tcp.port = htons(0xFFFF);
 		hlpr->mask.dst.protonum = 0xFF;
 		hlpr->max_expected = max_dcc_channels;
 		hlpr->timeout = dcc_timeout;
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 4adec47aae32..a1d6a89f64aa 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -78,12 +78,12 @@ static int help(struct sk_buff **pskb,
 		goto out;
 
 	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-	exp->tuple.src.u.udp.port = ntohs(NMBD_PORT);
+	exp->tuple.src.u.udp.port = htons(NMBD_PORT);
 
 	exp->mask.src.ip          = mask;
-	exp->mask.src.u.udp.port  = 0xFFFF;
-	exp->mask.dst.ip          = 0xFFFFFFFF;
-	exp->mask.dst.u.udp.port  = 0xFFFF;
+	exp->mask.src.u.udp.port  = htons(0xFFFF);
+	exp->mask.dst.ip          = htonl(0xFFFFFFFF);
+	exp->mask.dst.u.udp.port  = htons(0xFFFF);
 	exp->mask.dst.protonum    = 0xFF;
 
 	exp->expectfn             = NULL;
@@ -115,7 +115,7 @@ static struct ip_conntrack_helper helper = {
 		.src = {
 			.u = {
 				.udp = {
-					.port	= 0xFFFF,
+					.port	= __constant_htons(0xFFFF),
 				}
 			}
 		},
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 52eddea27e93..53b6dffea6c2 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -78,8 +78,8 @@ ctnetlink_dump_tuples_ip(struct sk_buff *skb,
 {
 	struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
 	
-	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
-	NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip);
+	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip);
+	NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip);
 
 	NFA_NEST_END(skb, nest_parms);
 
@@ -110,7 +110,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
 static inline int
 ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
 {
-	u_int32_t status = htonl((u_int32_t) ct->status);
+	__be32 status = htonl((u_int32_t) ct->status);
 	NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
 	return 0;
 
@@ -122,7 +122,7 @@ static inline int
 ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
 {
 	long timeout_l = ct->timeout.expires - jiffies;
-	u_int32_t timeout;
+	__be32 timeout;
 
 	if (timeout_l < 0)
 		timeout = 0;
@@ -192,13 +192,13 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
 {
 	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
 	struct nfattr *nest_count = NFA_NEST(skb, type);
-	u_int32_t tmp;
+	__be32 tmp;
 
 	tmp = htonl(ct->counters[dir].packets);
-	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
+	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp);
 
 	tmp = htonl(ct->counters[dir].bytes);
-	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
+	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp);
 
 	NFA_NEST_END(skb, nest_count);
 
@@ -215,9 +215,9 @@ nfattr_failure:
 static inline int
 ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
 {
-	u_int32_t mark = htonl(ct->mark);
+	__be32 mark = htonl(ct->mark);
 
-	NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
+	NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark);
 	return 0;
 
 nfattr_failure:
@@ -230,8 +230,8 @@ nfattr_failure:
 static inline int
 ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
 {
-	u_int32_t id = htonl(ct->id);
-	NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
+	__be32 id = htonl(ct->id);
+	NFA_PUT(skb, CTA_ID, sizeof(__be32), &id);
 	return 0;
 
 nfattr_failure:
@@ -241,9 +241,9 @@ nfattr_failure:
 static inline int
 ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
 {
-	u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
+	__be32 use = htonl(atomic_read(&ct->ct_general.use));
 	
-	NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
+	NFA_PUT(skb, CTA_USE, sizeof(__be32), &use);
 	return 0;
 
 nfattr_failure:
@@ -457,8 +457,8 @@ out:
 }
 
 static const size_t cta_min_ip[CTA_IP_MAX] = {
-	[CTA_IP_V4_SRC-1]	= sizeof(u_int32_t),
-	[CTA_IP_V4_DST-1]	= sizeof(u_int32_t),
+	[CTA_IP_V4_SRC-1]	= sizeof(__be32),
+	[CTA_IP_V4_DST-1]	= sizeof(__be32),
 };
 
 static inline int
@@ -475,11 +475,11 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
 
 	if (!tb[CTA_IP_V4_SRC-1])
 		return -EINVAL;
-	tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+	tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
 
 	if (!tb[CTA_IP_V4_DST-1])
 		return -EINVAL;
-	tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+	tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
 
 	DEBUGP("leaving\n");
 
@@ -602,8 +602,8 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr,
 }
 
 static const size_t cta_min_nat[CTA_NAT_MAX] = {
-	[CTA_NAT_MINIP-1]       = sizeof(u_int32_t),
-	[CTA_NAT_MAXIP-1]       = sizeof(u_int32_t),
+	[CTA_NAT_MINIP-1]       = sizeof(__be32),
+	[CTA_NAT_MAXIP-1]       = sizeof(__be32),
 };
 
 static inline int
@@ -623,12 +623,12 @@ ctnetlink_parse_nat(struct nfattr *nat,
 		return -EINVAL;
 
 	if (tb[CTA_NAT_MINIP-1])
-		range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
+		range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
 
 	if (!tb[CTA_NAT_MAXIP-1])
 		range->max_ip = range->min_ip;
 	else
-		range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
+		range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
 
 	if (range->min_ip)
 		range->flags |= IP_NAT_RANGE_MAP_IPS;
@@ -663,11 +663,11 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
 }
 
 static const size_t cta_min[CTA_MAX] = {
-	[CTA_STATUS-1] 		= sizeof(u_int32_t),
-	[CTA_TIMEOUT-1] 	= sizeof(u_int32_t),
-	[CTA_MARK-1]		= sizeof(u_int32_t),
-	[CTA_USE-1]		= sizeof(u_int32_t),
-	[CTA_ID-1]		= sizeof(u_int32_t)
+	[CTA_STATUS-1] 		= sizeof(__be32),
+	[CTA_TIMEOUT-1] 	= sizeof(__be32),
+	[CTA_MARK-1]		= sizeof(__be32),
+	[CTA_USE-1]		= sizeof(__be32),
+	[CTA_ID-1]		= sizeof(__be32)
 };
 
 static int
@@ -706,7 +706,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	ct = tuplehash_to_ctrack(h);
 	
 	if (cda[CTA_ID-1]) {
-		u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
+		u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1]));
 		if (ct->id != id) {
 			ip_conntrack_put(ct);
 			return -ENOENT;
@@ -808,7 +808,7 @@ static inline int
 ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
 {
 	unsigned long d;
-	unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
+	unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1]));
 	d = ct->status ^ status;
 
 	if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
@@ -903,7 +903,7 @@ ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
 static inline int
 ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
 {
-	u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+	u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
 	
 	if (!del_timer(&ct->timeout))
 		return -ETIME;
@@ -966,7 +966,7 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
 
 #if defined(CONFIG_IP_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK-1])
-		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+		ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
 #endif
 
 	DEBUGP("all done\n");
@@ -989,7 +989,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 
 	if (!cda[CTA_TIMEOUT-1])
 		goto err;
-	ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+	ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
 
 	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
 	ct->status |= IPS_CONFIRMED;
@@ -1006,7 +1006,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[],
 
 #if defined(CONFIG_IP_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK-1])
-		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+		ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
 #endif
 
 	ct->helper = ip_conntrack_helper_find_get(rtuple);
@@ -1138,8 +1138,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
                           const struct ip_conntrack_expect *exp)
 {
 	struct ip_conntrack *master = exp->master;
-	u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
-	u_int32_t id = htonl(exp->id);
+	__be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
+	__be32 id = htonl(exp->id);
 
 	if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
 		goto nfattr_failure;
@@ -1150,8 +1150,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 				 CTA_EXPECT_MASTER) < 0)
 		goto nfattr_failure;
 	
-	NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
-	NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
+	NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout);
+	NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id);
 
 	return 0;
 	
@@ -1272,8 +1272,8 @@ out:
 }
 
 static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
-	[CTA_EXPECT_TIMEOUT-1]          = sizeof(u_int32_t),
-	[CTA_EXPECT_ID-1]               = sizeof(u_int32_t)
+	[CTA_EXPECT_TIMEOUT-1]          = sizeof(__be32),
+	[CTA_EXPECT_ID-1]               = sizeof(__be32)
 };
 
 static int
@@ -1321,7 +1321,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 		return -ENOENT;
 
 	if (cda[CTA_EXPECT_ID-1]) {
-		u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+		__be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
 		if (exp->id != ntohl(id)) {
 			ip_conntrack_expect_put(exp);
 			return -ENOENT;
@@ -1375,8 +1375,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return -ENOENT;
 
 		if (cda[CTA_EXPECT_ID-1]) {
-			u_int32_t id = 
-				*(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+			__be32 id =
+				*(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
 			if (exp->id != ntohl(id)) {
 				ip_conntrack_expect_put(exp);
 				return -ENOENT;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 09c40ebe3345..295b6fa340db 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -261,7 +261,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
 static int icmp_tuple_to_nfattr(struct sk_buff *skb,
 				const struct ip_conntrack_tuple *t)
 {
-	NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
+	NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16),
 		&t->src.u.icmp.id);
 	NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
 		&t->dst.u.icmp.type);
@@ -287,7 +287,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[],
 	tuple->dst.u.icmp.code =
 			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
 	tuple->src.u.icmp.id =
-			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+			*(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
 
 	if (tuple->dst.u.icmp.type >= sizeof(invmap)
 	    || !invmap[tuple->dst.u.icmp.type])
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index b908a4842e18..2443322e4128 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -210,7 +210,7 @@ static int sctp_print_conntrack(struct seq_file *s,
 for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0;	\
 	offset < skb->len &&						\
 	(sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch));	\
-	offset += (htons(sch->length) + 3) & ~3, count++)
+	offset += (ntohs(sch->length) + 3) & ~3, count++)
 
 /* Some validity checks to make sure the chunks are fine */
 static int do_basic_checks(struct ip_conntrack *conntrack,
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 03ae9a04cb37..06e4e8a6dd9f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -519,8 +519,8 @@ static void tcp_sack(const struct sk_buff *skb,
 
 	/* Fast path for timestamp-only option */
 	if (length == TCPOLEN_TSTAMP_ALIGNED*4
-	    && *(__u32 *)ptr ==
-	        __constant_ntohl((TCPOPT_NOP << 24) 
+	    && *(__be32 *)ptr ==
+	        __constant_htonl((TCPOPT_NOP << 24)
 	        		 | (TCPOPT_NOP << 16)
 	        		 | (TCPOPT_TIMESTAMP << 8)
 	        		 | TCPOLEN_TIMESTAMP))
@@ -551,7 +551,7 @@ static void tcp_sack(const struct sk_buff *skb,
 			    	for (i = 0;
 			    	     i < (opsize - TCPOLEN_SACK_BASE);
 			    	     i += TCPOLEN_SACK_PERBLOCK) {
-					tmp = ntohl(*((u_int32_t *)(ptr+i)+1));
+					tmp = ntohl(*((__be32 *)(ptr+i)+1));
 					
 					if (after(tmp, *sack))
 						*sack = tmp;
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index 2893e9c74850..f4f75995a9e4 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -193,7 +193,7 @@ static int skp_digits_len(const char *dptr, const char *limit, int *shift)
 
 /* Simple ipaddr parser.. */
 static int parse_ipaddr(const char *cp,	const char **endp,
-			u_int32_t *ipaddr, const char *limit)
+			__be32 *ipaddr, const char *limit)
 {
 	unsigned long int val;
 	int i, digit = 0;
@@ -227,7 +227,7 @@ static int parse_ipaddr(const char *cp,	const char **endp,
 static int epaddr_len(const char *dptr, const char *limit, int *shift)
 {
 	const char *aux = dptr;
-	u_int32_t ip;
+	__be32 ip;
 
 	if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) {
 		DEBUGP("ip: %s parse failed.!\n", dptr);
@@ -302,7 +302,7 @@ int ct_sip_get_info(const char *dptr, size_t dlen,
 static int set_expected_rtp(struct sk_buff **pskb,
 			    struct ip_conntrack *ct,
 			    enum ip_conntrack_info ctinfo,
-			    u_int32_t ipaddr, u_int16_t port,
+			    __be32 ipaddr, u_int16_t port,
 			    const char *dptr)
 {
 	struct ip_conntrack_expect *exp;
@@ -319,10 +319,10 @@ static int set_expected_rtp(struct sk_buff **pskb,
 	exp->tuple.dst.u.udp.port = htons(port);
 	exp->tuple.dst.protonum = IPPROTO_UDP;
 
-	exp->mask.src.ip = 0xFFFFFFFF;
+	exp->mask.src.ip = htonl(0xFFFFFFFF);
 	exp->mask.src.u.udp.port = 0;
-	exp->mask.dst.ip = 0xFFFFFFFF;
-	exp->mask.dst.u.udp.port = 0xFFFF;
+	exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	exp->mask.dst.u.udp.port = htons(0xFFFF);
 	exp->mask.dst.protonum = 0xFF;
 
 	exp->expectfn = NULL;
@@ -349,7 +349,7 @@ static int sip_help(struct sk_buff **pskb,
 	const char *dptr;
 	int ret = NF_ACCEPT;
 	int matchoff, matchlen;
-	u_int32_t ipaddr;
+	__be32 ipaddr;
 	u_int16_t port;
 
 	/* No Data ? */
@@ -439,7 +439,7 @@ static int __init init(void)
 
 		sip[i].tuple.dst.protonum = IPPROTO_UDP;
 		sip[i].tuple.src.u.udp.port = htons(ports[i]);
-		sip[i].mask.src.u.udp.port = 0xFFFF;
+		sip[i].mask.src.u.udp.port = htons(0xFFFF);
 		sip[i].mask.dst.protonum = 0xFF;
 		sip[i].max_expected = 2;
 		sip[i].timeout = 3 * 60; /* 3 minutes */
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index 7e33d3bed5e3..fe0b634dd377 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -70,10 +70,10 @@ static int tftp_help(struct sk_buff **pskb,
 			return NF_DROP;
 
 		exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-		exp->mask.src.ip = 0xffffffff;
+		exp->mask.src.ip = htonl(0xffffffff);
 		exp->mask.src.u.udp.port = 0;
-		exp->mask.dst.ip = 0xffffffff;
-		exp->mask.dst.u.udp.port = 0xffff;
+		exp->mask.dst.ip = htonl(0xffffffff);
+		exp->mask.dst.u.udp.port = htons(0xffff);
 		exp->mask.dst.protonum = 0xff;
 		exp->expectfn = NULL;
 		exp->flags = 0;
@@ -129,7 +129,7 @@ static int __init ip_conntrack_tftp_init(void)
 		tftp[i].tuple.dst.protonum = IPPROTO_UDP;
 		tftp[i].tuple.src.u.udp.port = htons(ports[i]);
 		tftp[i].mask.dst.protonum = 0xFF;
-		tftp[i].mask.src.u.udp.port = 0xFFFF;
+		tftp[i].mask.src.u.udp.port = htons(0xFFFF);
 		tftp[i].max_expected = 1;
 		tftp[i].timeout = 5 * 60; /* 5 minutes */
 		tftp[i].me = THIS_MODULE;
-- 
cgit v1.2.3


From a76b11dd25957287af12ce6855be6d7fd415b3a9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Sep 2006 14:22:02 -0700
Subject: [NETFILTER]: NAT annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_nat.h   |  2 +-
 net/ipv4/netfilter/ip_nat_core.c        | 14 ++++++-------
 net/ipv4/netfilter/ip_nat_ftp.c         | 10 ++++-----
 net/ipv4/netfilter/ip_nat_helper.c      | 37 +++++++++++++++------------------
 net/ipv4/netfilter/ip_nat_helper_pptp.c |  2 +-
 net/ipv4/netfilter/ip_nat_proto_icmp.c  |  2 +-
 net/ipv4/netfilter/ip_nat_proto_tcp.c   | 10 ++++-----
 net/ipv4/netfilter/ip_nat_proto_udp.c   | 10 ++++-----
 net/ipv4/netfilter/ip_nat_rule.c        |  6 +++---
 net/ipv4/netfilter/ip_nat_sip.c         |  8 +++----
 net/ipv4/netfilter/ip_nat_snmp_basic.c  |  2 +-
 net/ipv4/netfilter/ip_nat_standalone.c  |  2 +-
 12 files changed, 51 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
index 98f8407e4cb5..bdf553620ca1 100644
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ b/include/linux/netfilter_ipv4/ip_nat.h
@@ -33,7 +33,7 @@ struct ip_nat_range
 	unsigned int flags;
 
 	/* Inclusive: network order. */
-	u_int32_t min_ip, max_ip;
+	__be32 min_ip, max_ip;
 
 	/* Inclusive: network order */
 	union ip_conntrack_manip_proto min, max;
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 71f3e09cbc84..4b6260a97408 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -82,7 +82,7 @@ static inline unsigned int
 hash_by_src(const struct ip_conntrack_tuple *tuple)
 {
 	/* Original src, to ensure we map it consistently if poss. */
-	return jhash_3words(tuple->src.ip, tuple->src.u.all,
+	return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all,
 			    tuple->dst.protonum, 0) % ip_nat_htable_size;
 }
 
@@ -190,7 +190,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
 		    const struct ip_conntrack *conntrack,
 		    enum ip_nat_manip_type maniptype)
 {
-	u_int32_t *var_ipp;
+	__be32 *var_ipp;
 	/* Host order */
 	u_int32_t minip, maxip, j;
 
@@ -217,7 +217,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
 	 * like this), even across reboots. */
 	minip = ntohl(range->min_ip);
 	maxip = ntohl(range->max_ip);
-	j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0);
+	j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0);
 	*var_ipp = htonl(minip + j % (maxip - minip + 1));
 }
 
@@ -534,9 +534,9 @@ int
 ip_nat_port_range_to_nfattr(struct sk_buff *skb, 
 			    const struct ip_nat_range *range)
 {
-	NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t),
+	NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
 		&range->min.tcp.port);
-	NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t),
+	NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
 		&range->max.tcp.port);
 
 	return 0;
@@ -555,7 +555,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
 	if (tb[CTA_PROTONAT_PORT_MIN-1]) {
 		ret = 1;
 		range->min.tcp.port = 
-			*(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
+			*(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
 	}
 	
 	if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
@@ -564,7 +564,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
 	} else {
 		ret = 1;
 		range->max.tcp.port = 
-			*(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
+			*(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
 	}
 
 	return ret;
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index 3328fc5c5f50..a71c233d8112 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -34,7 +34,7 @@ MODULE_DESCRIPTION("ftp NAT helper");
 
 static int
 mangle_rfc959_packet(struct sk_buff **pskb,
-		     u_int32_t newip,
+		     __be32 newip,
 		     u_int16_t port,
 		     unsigned int matchoff,
 		     unsigned int matchlen,
@@ -57,7 +57,7 @@ mangle_rfc959_packet(struct sk_buff **pskb,
 /* |1|132.235.1.2|6275| */
 static int
 mangle_eprt_packet(struct sk_buff **pskb,
-		   u_int32_t newip,
+		   __be32 newip,
 		   u_int16_t port,
 		   unsigned int matchoff,
 		   unsigned int matchlen,
@@ -79,7 +79,7 @@ mangle_eprt_packet(struct sk_buff **pskb,
 /* |1|132.235.1.2|6275| */
 static int
 mangle_epsv_packet(struct sk_buff **pskb,
-		   u_int32_t newip,
+		   __be32 newip,
 		   u_int16_t port,
 		   unsigned int matchoff,
 		   unsigned int matchlen,
@@ -98,7 +98,7 @@ mangle_epsv_packet(struct sk_buff **pskb,
 					matchlen, buffer, strlen(buffer));
 }
 
-static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t,
+static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
 		     unsigned int,
 		     unsigned int,
 		     struct ip_conntrack *,
@@ -120,7 +120,7 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb,
 			       struct ip_conntrack_expect *exp,
 			       u32 *seq)
 {
-	u_int32_t newip;
+	__be32 newip;
 	u_int16_t port;
 	int dir = CTINFO2DIR(ctinfo);
 	struct ip_conntrack *ct = exp->master;
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index e9c5187ea5b2..3bf858480558 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -189,7 +189,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
 					   		datalen, 0));
 	} else
 		tcph->check = nf_proto_csum_update(*pskb,
-						   htons(oldlen) ^ 0xFFFF,
+						   htons(oldlen) ^ htons(0xFFFF),
 						   htons(datalen),
 						   tcph->check, 1);
 
@@ -267,7 +267,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
 			udph->check = -1;
 	} else
 		udph->check = nf_proto_csum_update(*pskb,
-						   htons(oldlen) ^ 0xFFFF,
+						   htons(oldlen) ^ htons(0xFFFF),
 						   htons(datalen),
 						   udph->check, 1);
 	return 1;
@@ -284,26 +284,24 @@ sack_adjust(struct sk_buff *skb,
 {
 	while (sackoff < sackend) {
 		struct tcp_sack_block_wire *sack;
-		u_int32_t new_start_seq, new_end_seq;
+		__be32 new_start_seq, new_end_seq;
 
 		sack = (void *)skb->data + sackoff;
 		if (after(ntohl(sack->start_seq) - natseq->offset_before,
 			  natseq->correction_pos))
-			new_start_seq = ntohl(sack->start_seq) 
-					- natseq->offset_after;
+			new_start_seq = htonl(ntohl(sack->start_seq)
+					- natseq->offset_after);
 		else
-			new_start_seq = ntohl(sack->start_seq) 
-					- natseq->offset_before;
-		new_start_seq = htonl(new_start_seq);
+			new_start_seq = htonl(ntohl(sack->start_seq)
+					- natseq->offset_before);
 
 		if (after(ntohl(sack->end_seq) - natseq->offset_before,
 			  natseq->correction_pos))
-			new_end_seq = ntohl(sack->end_seq)
-				      - natseq->offset_after;
+			new_end_seq = htonl(ntohl(sack->end_seq)
+				      - natseq->offset_after);
 		else
-			new_end_seq = ntohl(sack->end_seq)
-				      - natseq->offset_before;
-		new_end_seq = htonl(new_end_seq);
+			new_end_seq = htonl(ntohl(sack->end_seq)
+				      - natseq->offset_before);
 
 		DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
 			ntohl(sack->start_seq), new_start_seq,
@@ -375,7 +373,8 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
 		  enum ip_conntrack_info ctinfo)
 {
 	struct tcphdr *tcph;
-	int dir, newseq, newack;
+	int dir;
+	__be32 newseq, newack;
 	struct ip_nat_seq *this_way, *other_way;	
 
 	dir = CTINFO2DIR(ctinfo);
@@ -388,17 +387,15 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
 
 	tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
 	if (after(ntohl(tcph->seq), this_way->correction_pos))
-		newseq = ntohl(tcph->seq) + this_way->offset_after;
+		newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
 	else
-		newseq = ntohl(tcph->seq) + this_way->offset_before;
-	newseq = htonl(newseq);
+		newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
 
 	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
 		  other_way->correction_pos))
-		newack = ntohl(tcph->ack_seq) - other_way->offset_after;
+		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
 	else
-		newack = ntohl(tcph->ack_seq) - other_way->offset_before;
-	newack = htonl(newack);
+		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
 
 	tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
 					   tcph->check, 0);
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
index 2ff578807123..329fdcd7d702 100644
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -51,7 +51,7 @@
 
 #define IP_NAT_PPTP_VERSION "3.0"
 
-#define REQ_CID(req, off)		(*(u_int16_t *)((char *)(req) + (off)))
+#define REQ_CID(req, off)		(*(__be16 *)((char *)(req) + (off)))
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index ec50cc295317..3f6efc13ac74 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -67,7 +67,7 @@ icmp_manip_pkt(struct sk_buff **pskb,
 
 	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
 	hdr->checksum = nf_proto_csum_update(*pskb,
-					     hdr->un.echo.id ^ 0xFFFF,
+					     hdr->un.echo.id ^ htons(0xFFFF),
 					     tuple->src.u.icmp.id,
 					     hdr->checksum, 0);
 	hdr->un.echo.id = tuple->src.u.icmp.id;
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index 72a6307bd2db..12deb13b93b1 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -24,7 +24,7 @@ tcp_in_range(const struct ip_conntrack_tuple *tuple,
 	     const union ip_conntrack_manip_proto *min,
 	     const union ip_conntrack_manip_proto *max)
 {
-	u_int16_t port;
+	__be16 port;
 
 	if (maniptype == IP_NAT_MANIP_SRC)
 		port = tuple->src.u.tcp.port;
@@ -42,7 +42,7 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
 		 const struct ip_conntrack *conntrack)
 {
 	static u_int16_t port;
-	u_int16_t *portptr;
+	__be16 *portptr;
 	unsigned int range_size, min, i;
 
 	if (maniptype == IP_NAT_MANIP_SRC)
@@ -93,8 +93,8 @@ tcp_manip_pkt(struct sk_buff **pskb,
 	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
 	struct tcphdr *hdr;
 	unsigned int hdroff = iphdroff + iph->ihl*4;
-	u32 oldip, newip;
-	u16 *portptr, newport, oldport;
+	__be32 oldip, newip;
+	__be16 *portptr, newport, oldport;
 	int hdrsize = 8; /* TCP connection tracking guarantees this much */
 
 	/* this could be a inner header returned in icmp packet; in such
@@ -130,7 +130,7 @@ tcp_manip_pkt(struct sk_buff **pskb,
 		return 1;
 
 	hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1);
-	hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport,
+	hdr->check = nf_proto_csum_update(*pskb, oldport ^ htons(0xFFFF), newport,
 					  hdr->check, 0);
 	return 1;
 }
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index 5da196ae758c..4bbec7730d18 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -24,7 +24,7 @@ udp_in_range(const struct ip_conntrack_tuple *tuple,
 	     const union ip_conntrack_manip_proto *min,
 	     const union ip_conntrack_manip_proto *max)
 {
-	u_int16_t port;
+	__be16 port;
 
 	if (maniptype == IP_NAT_MANIP_SRC)
 		port = tuple->src.u.udp.port;
@@ -42,7 +42,7 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple,
 		 const struct ip_conntrack *conntrack)
 {
 	static u_int16_t port;
-	u_int16_t *portptr;
+	__be16 *portptr;
 	unsigned int range_size, min, i;
 
 	if (maniptype == IP_NAT_MANIP_SRC)
@@ -91,8 +91,8 @@ udp_manip_pkt(struct sk_buff **pskb,
 	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
 	struct udphdr *hdr;
 	unsigned int hdroff = iphdroff + iph->ihl*4;
-	u32 oldip, newip;
-	u16 *portptr, newport;
+	__be32 oldip, newip;
+	__be16 *portptr, newport;
 
 	if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
 		return 0;
@@ -118,7 +118,7 @@ udp_manip_pkt(struct sk_buff **pskb,
 		hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip,
 						  hdr->check, 1);
 		hdr->check = nf_proto_csum_update(*pskb,
-						  *portptr ^ 0xFFFF, newport,
+						  *portptr ^ htons(0xFFFF), newport,
 						  hdr->check, 0);
 		if (!hdr->check)
 			hdr->check = -1;
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 7b703839aa58..a176aa3031e0 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -119,7 +119,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
 }
 
 /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(u32 dstip, u32 srcip)
+static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
 {
 	static int warned = 0;
 	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
@@ -205,7 +205,7 @@ alloc_null_binding(struct ip_conntrack *conntrack,
 	   per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
 	   Use reply in case it's already been mangled (eg local packet).
 	*/
-	u_int32_t ip
+	__be32 ip
 		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
 		   ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
 		   : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
@@ -222,7 +222,7 @@ alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
                              struct ip_nat_info *info,
                              unsigned int hooknum)
 {
-	u_int32_t ip
+	__be32 ip
 		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
 		   ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
 		   : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c
index 6ffba63adca2..71fc2730a007 100644
--- a/net/ipv4/netfilter/ip_nat_sip.c
+++ b/net/ipv4/netfilter/ip_nat_sip.c
@@ -60,8 +60,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
 	unsigned int bufflen, dataoff;
-	u_int32_t ip;
-	u_int16_t port;
+	__be32 ip;
+	__be16 port;
 
 	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
 
@@ -159,7 +159,7 @@ static int mangle_content_len(struct sk_buff **pskb,
 static unsigned int mangle_sdp(struct sk_buff **pskb,
 			       enum ip_conntrack_info ctinfo,
 			       struct ip_conntrack *ct,
-			       u_int32_t newip, u_int16_t port,
+			       __be32 newip, u_int16_t port,
 			       const char *dptr)
 {
 	char buffer[sizeof("nnn.nnn.nnn.nnn")];
@@ -195,7 +195,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
 {
 	struct ip_conntrack *ct = exp->master;
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	u_int32_t newip;
+	__be32 newip;
 	u_int16_t port;
 
 	DEBUGP("ip_nat_sdp():\n");
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 18b7fbdccb61..168f45fa1898 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1211,7 +1211,7 @@ static int snmp_translate(struct ip_conntrack *ct,
                           struct sk_buff **pskb)
 {
 	struct iphdr *iph = (*pskb)->nh.iph;
-	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+	struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
 	u_int16_t udplen = ntohs(udph->len);
 	u_int16_t paylen = udplen - sizeof(struct udphdr);
 	int dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 9c577db62047..021395b67463 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -191,7 +191,7 @@ ip_nat_in(unsigned int hooknum,
           int (*okfn)(struct sk_buff *))
 {
 	unsigned int ret;
-	u_int32_t daddr = (*pskb)->nh.iph->daddr;
+	__be32 daddr = (*pskb)->nh.iph->daddr;
 
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN
-- 
cgit v1.2.3


From 6a19d61472d0802a24493c0d200e88f99ad39cd8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Sep 2006 14:22:24 -0700
Subject: [NETFILTER]: ipt annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ipt_iprange.h |  2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c         | 14 +++++++-------
 net/ipv4/netfilter/ipt_ECN.c               | 12 ++++++------
 net/ipv4/netfilter/ipt_NETMAP.c            |  2 +-
 net/ipv4/netfilter/ipt_REJECT.c            |  4 ++--
 net/ipv4/netfilter/ipt_SAME.c              |  3 ++-
 net/ipv4/netfilter/ipt_TCPMSS.c            | 17 +++++++++--------
 net/ipv4/netfilter/ipt_TOS.c               |  4 ++--
 net/ipv4/netfilter/ipt_TTL.c               |  4 ++--
 net/ipv4/netfilter/ipt_hashlimit.c         | 16 +++++++++-------
 net/ipv4/netfilter/ipt_recent.c            | 15 +++++++--------
 net/ipv4/netfilter/iptable_mangle.c        |  2 +-
 12 files changed, 49 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ipt_iprange.h b/include/linux/netfilter_ipv4/ipt_iprange.h
index 3ecb3bd63676..34ab0fb736e2 100644
--- a/include/linux/netfilter_ipv4/ipt_iprange.h
+++ b/include/linux/netfilter_ipv4/ipt_iprange.h
@@ -8,7 +8,7 @@
 
 struct ipt_iprange {
 	/* Inclusive: network order. */
-	u_int32_t min_ip, max_ip;
+	__be32 min_ip, max_ip;
 };
 
 struct ipt_iprange_info
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 41589665fc5d..7a29d6e7baa7 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -52,7 +52,7 @@ struct clusterip_config {
 	atomic_t entries;			/* number of entries/rules
 						 * referencing us */
 
-	u_int32_t clusterip;			/* the IP address */
+	__be32 clusterip;			/* the IP address */
 	u_int8_t clustermac[ETH_ALEN];		/* the MAC address */
 	struct net_device *dev;			/* device */
 	u_int16_t num_total_nodes;		/* total number of nodes */
@@ -119,7 +119,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 }
 
 static struct clusterip_config *
-__clusterip_config_find(u_int32_t clusterip)
+__clusterip_config_find(__be32 clusterip)
 {
 	struct list_head *pos;
 
@@ -136,7 +136,7 @@ __clusterip_config_find(u_int32_t clusterip)
 }
 
 static inline struct clusterip_config *
-clusterip_config_find_get(u_int32_t clusterip, int entry)
+clusterip_config_find_get(__be32 clusterip, int entry)
 {
 	struct clusterip_config *c;
 
@@ -166,7 +166,7 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
 }
 
 static struct clusterip_config *
-clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
+clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip,
 			struct net_device *dev)
 {
 	struct clusterip_config *c;
@@ -387,7 +387,7 @@ checkentry(const char *tablename,
 		return 0;
 
 	}
-	if (e->ip.dmsk.s_addr != 0xffffffff
+	if (e->ip.dmsk.s_addr != htonl(0xffffffff)
 	    || e->ip.dst.s_addr == 0) {
 		printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
 		return 0;
@@ -476,9 +476,9 @@ static struct ipt_target clusterip_tgt = {
 /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
 struct arp_payload {
 	u_int8_t src_hw[ETH_ALEN];
-	u_int32_t src_ip;
+	__be32 src_ip;
 	u_int8_t dst_hw[ETH_ALEN];
-	u_int32_t dst_ip;
+	__be32 dst_ip;
 } __attribute__ ((packed));
 
 #ifdef CLUSTERIP_DEBUG
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 23f9c7ebe7eb..12a818a2462f 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -28,7 +28,7 @@ static inline int
 set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
 	struct iphdr *iph = (*pskb)->nh.iph;
-	u_int16_t oldtos;
+	__be16 oldtos;
 
 	if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
@@ -37,7 +37,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 		oldtos = iph->tos;
 		iph->tos &= ~IPT_ECN_IP_MASK;
 		iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
-		iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
+		iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos,
 					    iph->check);
 	} 
 	return 1;
@@ -48,7 +48,7 @@ static inline int
 set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
 	struct tcphdr _tcph, *tcph;
-	u_int16_t oldval;
+	__be16 oldval;
 
 	/* Not enought header? */
 	tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
@@ -66,15 +66,15 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 		return 0;
 	tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
 
-	oldval = ((u_int16_t *)tcph)[6];
+	oldval = ((__be16 *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
 		tcph->ece = einfo->proto.tcp.ece;
 	if (einfo->operation & IPT_ECN_OP_SET_CWR)
 		tcph->cwr = einfo->proto.tcp.cwr;
 
 	tcph->check = nf_proto_csum_update((*pskb),
-					   oldval ^ 0xFFFF,
-					   ((u_int16_t *)tcph)[6],
+					   oldval ^ htons(0xFFFF),
+					   ((__be16 *)tcph)[6],
 					   tcph->check, 0);
 	return 1;
 }
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index beb2914225ff..58a88f227108 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -58,7 +58,7 @@ target(struct sk_buff **pskb,
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
-	u_int32_t new_ip, netmask;
+	__be32 new_ip, netmask;
 	const struct ip_nat_multi_range_compat *mr = targinfo;
 	struct ip_nat_range newrange;
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b81821edd893..fd0c05efed8a 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -104,8 +104,8 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	struct iphdr *iph = oldskb->nh.iph;
 	struct tcphdr _otcph, *oth, *tcph;
 	struct rtable *rt;
-	u_int16_t tmp_port;
-	u_int32_t tmp_addr;
+	__be16 tmp_port;
+	__be32 tmp_addr;
 	int needs_ack;
 	int hh_len;
 
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index efbcb1198832..b38b13328d73 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -135,7 +135,8 @@ same_target(struct sk_buff **pskb,
 {
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
-	u_int32_t tmpip, aindex, new_ip;
+	u_int32_t tmpip, aindex;
+	__be32 new_ip;
 	const struct ipt_same_info *same = targinfo;
 	struct ip_nat_range newrange;
 	const struct ip_conntrack_tuple *t;
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 4246c4321e5b..108b6b76311f 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -42,7 +42,8 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
 	struct tcphdr *tcph;
 	struct iphdr *iph;
-	u_int16_t tcplen, newtotlen, oldval, newmss;
+	u_int16_t tcplen, newmss;
+	__be16 newtotlen, oldval;
 	unsigned int i;
 	u_int8_t *opt;
 
@@ -97,7 +98,7 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 			opt[i+3] = (newmss & 0x00ff);
 
 			tcph->check = nf_proto_csum_update(*pskb,
-							   htons(oldmss)^0xFFFF,
+							   htons(oldmss)^htons(0xFFFF),
 							   htons(newmss),
 							   tcph->check, 0);
 			return IPT_CONTINUE;
@@ -126,7 +127,7 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
 
 	tcph->check = nf_proto_csum_update(*pskb,
-					   htons(tcplen) ^ 0xFFFF,
+					   htons(tcplen) ^ htons(0xFFFF),
 				           htons(tcplen + TCPOLEN_MSS),
 					   tcph->check, 1);
 	opt[0] = TCPOPT_MSS;
@@ -134,18 +135,18 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	opt[2] = (newmss & 0xff00) >> 8;
 	opt[3] = (newmss & 0x00ff);
 
-	tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt),
+	tcph->check = nf_proto_csum_update(*pskb, htonl(~0), *((__be32 *)opt),
 					   tcph->check, 0);
 
-	oldval = ((u_int16_t *)tcph)[6];
+	oldval = ((__be16 *)tcph)[6];
 	tcph->doff += TCPOLEN_MSS/4;
 	tcph->check = nf_proto_csum_update(*pskb,
-					   oldval ^ 0xFFFF,
-					   ((u_int16_t *)tcph)[6],
+					   oldval ^ htons(0xFFFF),
+					   ((__be16 *)tcph)[6],
 					   tcph->check, 0);
 
 	newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
-	iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF,
+	iph->check = nf_csum_update(iph->tot_len ^ htons(0xFFFF),
 				    newtotlen, iph->check);
 	iph->tot_len = newtotlen;
 	return IPT_CONTINUE;
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 471a4c438b0a..6b8b14ccc3d3 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -30,7 +30,7 @@ target(struct sk_buff **pskb,
 {
 	const struct ipt_tos_target_info *tosinfo = targinfo;
 	struct iphdr *iph = (*pskb)->nh.iph;
-	u_int16_t oldtos;
+	__be16 oldtos;
 
 	if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
@@ -38,7 +38,7 @@ target(struct sk_buff **pskb,
 		iph = (*pskb)->nh.iph;
 		oldtos = iph->tos;
 		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
-		iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos,
+		iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos,
 					    iph->check);
 	}
 	return IPT_CONTINUE;
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 96e79cc6d0f2..ac9517d62af0 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -54,8 +54,8 @@ ipt_ttl_target(struct sk_buff **pskb,
 	}
 
 	if (new_ttl != iph->ttl) {
-		iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF,
-					    ntohs(new_ttl << 8),
+		iph->check = nf_csum_update(htons((iph->ttl << 8)) ^ htons(0xFFFF),
+					    htons(new_ttl << 8),
 					    iph->check);
 		iph->ttl = new_ttl;
 	}
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 4f73a61aa3dd..33ccdbf8e794 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -50,11 +50,11 @@ static struct file_operations dl_file_ops;
 /* hash table crap */
 
 struct dsthash_dst {
-	u_int32_t src_ip;
-	u_int32_t dst_ip;
+	__be32 src_ip;
+	__be32 dst_ip;
 	/* ports have to be consecutive !!! */
-	u_int16_t src_port;
-	u_int16_t dst_port;
+	__be16 src_port;
+	__be16 dst_port;
 };
 
 struct dsthash_ent {
@@ -106,8 +106,10 @@ static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
 static inline u_int32_t
 hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst)
 {
-	return (jhash_3words(dst->dst_ip, (dst->dst_port<<16 | dst->src_port), 
-			     dst->src_ip, ht->rnd) % ht->cfg.size);
+	return (jhash_3words((__force u32)dst->dst_ip,
+			    ((__force u32)dst->dst_port<<16 |
+			     (__force u32)dst->src_port),
+			     (__force u32)dst->src_ip, ht->rnd) % ht->cfg.size);
 }
 
 static inline struct dsthash_ent *
@@ -406,7 +408,7 @@ hashlimit_match(const struct sk_buff *skb,
 		dst.src_ip = skb->nh.iph->saddr;
 	if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT
 	    ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) {
-		u_int16_t _ports[2], *ports;
+		__be16 _ports[2], *ports;
 
 		switch (skb->nh.iph->protocol) {
 		case IPPROTO_TCP:
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 32ae8d7ac506..126db44e71a8 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -50,11 +50,10 @@ MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files");
 MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files");
 MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files");
 
-
 struct recent_entry {
 	struct list_head	list;
 	struct list_head	lru_list;
-	u_int32_t		addr;
+	__be32			addr;
 	u_int8_t		ttl;
 	u_int8_t		index;
 	u_int16_t		nstamps;
@@ -85,17 +84,17 @@ static struct file_operations	recent_fops;
 static u_int32_t hash_rnd;
 static int hash_rnd_initted;
 
-static unsigned int recent_entry_hash(u_int32_t addr)
+static unsigned int recent_entry_hash(__be32 addr)
 {
 	if (!hash_rnd_initted) {
 		get_random_bytes(&hash_rnd, 4);
 		hash_rnd_initted = 1;
 	}
-	return jhash_1word(addr, hash_rnd) & (ip_list_hash_size - 1);
+	return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1);
 }
 
 static struct recent_entry *
-recent_entry_lookup(const struct recent_table *table, u_int32_t addr, u_int8_t ttl)
+recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl)
 {
 	struct recent_entry *e;
 	unsigned int h;
@@ -116,7 +115,7 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
 }
 
 static struct recent_entry *
-recent_entry_init(struct recent_table *t, u_int32_t addr, u_int8_t ttl)
+recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl)
 {
 	struct recent_entry *e;
 
@@ -178,7 +177,7 @@ ipt_recent_match(const struct sk_buff *skb,
 	const struct ipt_recent_info *info = matchinfo;
 	struct recent_table *t;
 	struct recent_entry *e;
-	u_int32_t addr;
+	__be32 addr;
 	u_int8_t ttl;
 	int ret = info->invert;
 
@@ -406,7 +405,7 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input,
 	struct recent_table *t = pde->data;
 	struct recent_entry *e;
 	char buf[sizeof("+255.255.255.255")], *c = buf;
-	u_int32_t addr;
+	__be32 addr;
 	int add;
 
 	if (size > sizeof(buf))
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 79336cb42527..e62ea2bb9c0a 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -131,7 +131,7 @@ ipt_local_hook(unsigned int hook,
 {
 	unsigned int ret;
 	u_int8_t tos;
-	u_int32_t saddr, daddr;
+	__be32 saddr, daddr;
 	unsigned long nfmark;
 
 	/* root is playing with raw sockets. */
-- 
cgit v1.2.3


From d4263cde88d3fee2af0aac8836bb785cdb6b06c0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Sep 2006 14:22:51 -0700
Subject: [NETFILTER]: h323 annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv4/ip_conntrack_h323.h |  6 +-
 net/ipv4/netfilter/ip_conntrack_helper_h323.c    | 84 ++++++++++++------------
 net/ipv4/netfilter/ip_nat_helper_h323.c          | 16 ++---
 3 files changed, 53 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_h323.h b/include/linux/netfilter_ipv4/ip_conntrack_h323.h
index 3cbff7379002..943cc6a4871d 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_h323.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_h323.h
@@ -30,7 +30,7 @@ struct ip_ct_h323_master {
 struct ip_conntrack_expect;
 
 extern int get_h225_addr(unsigned char *data, TransportAddress * addr,
-			 u_int32_t * ip, u_int16_t * port);
+			 __be32 * ip, u_int16_t * port);
 extern void ip_conntrack_h245_expect(struct ip_conntrack *new,
 				     struct ip_conntrack_expect *this);
 extern void ip_conntrack_q931_expect(struct ip_conntrack *new,
@@ -38,11 +38,11 @@ extern void ip_conntrack_q931_expect(struct ip_conntrack *new,
 extern int (*set_h245_addr_hook) (struct sk_buff ** pskb,
 				  unsigned char **data, int dataoff,
 				  H245_TransportAddress * addr,
-				  u_int32_t ip, u_int16_t port);
+				  __be32 ip, u_int16_t port);
 extern int (*set_h225_addr_hook) (struct sk_buff ** pskb,
 				  unsigned char **data, int dataoff,
 				  TransportAddress * addr,
-				  u_int32_t ip, u_int16_t port);
+				  __be32 ip, u_int16_t port);
 extern int (*set_sig_addr_hook) (struct sk_buff ** pskb,
 				 struct ip_conntrack * ct,
 				 enum ip_conntrack_info ctinfo,
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
index 9a39e2969712..7b7441202bfd 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
@@ -49,11 +49,11 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
 int (*set_h245_addr_hook) (struct sk_buff ** pskb,
 			   unsigned char **data, int dataoff,
 			   H245_TransportAddress * addr,
-			   u_int32_t ip, u_int16_t port);
+			   __be32 ip, u_int16_t port);
 int (*set_h225_addr_hook) (struct sk_buff ** pskb,
 			   unsigned char **data, int dataoff,
 			   TransportAddress * addr,
-			   u_int32_t ip, u_int16_t port);
+			   __be32 ip, u_int16_t port);
 int (*set_sig_addr_hook) (struct sk_buff ** pskb,
 			  struct ip_conntrack * ct,
 			  enum ip_conntrack_info ctinfo,
@@ -209,7 +209,7 @@ static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct,
 
 /****************************************************************************/
 static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr,
-			 u_int32_t * ip, u_int16_t * port)
+			 __be32 * ip, u_int16_t * port)
 {
 	unsigned char *p;
 
@@ -232,7 +232,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 	u_int16_t rtp_port;
 	struct ip_conntrack_expect *rtp_exp;
@@ -254,10 +254,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
 	rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
 	rtp_exp->tuple.dst.u.udp.port = htons(rtp_port);
 	rtp_exp->tuple.dst.protonum = IPPROTO_UDP;
-	rtp_exp->mask.src.ip = 0xFFFFFFFF;
+	rtp_exp->mask.src.ip = htonl(0xFFFFFFFF);
 	rtp_exp->mask.src.u.udp.port = 0;
-	rtp_exp->mask.dst.ip = 0xFFFFFFFF;
-	rtp_exp->mask.dst.u.udp.port = 0xFFFF;
+	rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	rtp_exp->mask.dst.u.udp.port = htons(0xFFFF);
 	rtp_exp->mask.dst.protonum = 0xFF;
 	rtp_exp->flags = 0;
 
@@ -271,10 +271,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
 	rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
 	rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1);
 	rtcp_exp->tuple.dst.protonum = IPPROTO_UDP;
-	rtcp_exp->mask.src.ip = 0xFFFFFFFF;
+	rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF);
 	rtcp_exp->mask.src.u.udp.port = 0;
-	rtcp_exp->mask.dst.ip = 0xFFFFFFFF;
-	rtcp_exp->mask.dst.u.udp.port = 0xFFFF;
+	rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF);
 	rtcp_exp->mask.dst.protonum = 0xFF;
 	rtcp_exp->flags = 0;
 
@@ -325,7 +325,7 @@ static int expect_t120(struct sk_buff **pskb,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 	struct ip_conntrack_expect *exp = NULL;
 
@@ -342,10 +342,10 @@ static int expect_t120(struct sk_buff **pskb,
 	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
 	exp->tuple.dst.u.tcp.port = htons(port);
 	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = 0xFFFFFFFF;
+	exp->mask.src.ip = htonl(0xFFFFFFFF);
 	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = 0xFFFFFFFF;
-	exp->mask.dst.u.tcp.port = 0xFFFF;
+	exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	exp->mask.dst.u.tcp.port = htons(0xFFFF);
 	exp->mask.dst.protonum = 0xFF;
 	exp->flags = IP_CT_EXPECT_PERMANENT;	/* Accept multiple channels */
 
@@ -626,7 +626,7 @@ void ip_conntrack_h245_expect(struct ip_conntrack *new,
 
 /****************************************************************************/
 int get_h225_addr(unsigned char *data, TransportAddress * addr,
-		  u_int32_t * ip, u_int16_t * port)
+		  __be32 * ip, u_int16_t * port)
 {
 	unsigned char *p;
 
@@ -648,7 +648,7 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 	struct ip_conntrack_expect *exp = NULL;
 
@@ -665,10 +665,10 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
 	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
 	exp->tuple.dst.u.tcp.port = htons(port);
 	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = 0xFFFFFFFF;
+	exp->mask.src.ip = htonl(0xFFFFFFFF);
 	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = 0xFFFFFFFF;
-	exp->mask.dst.u.tcp.port = 0xFFFF;
+	exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	exp->mask.dst.u.tcp.port = htons(0xFFFF);
 	exp->mask.dst.protonum = 0xFF;
 	exp->flags = 0;
 
@@ -709,7 +709,7 @@ static int expect_callforwarding(struct sk_buff **pskb,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 	struct ip_conntrack_expect *exp = NULL;
 
@@ -751,10 +751,10 @@ static int expect_callforwarding(struct sk_buff **pskb,
 	exp->tuple.dst.ip = ip;
 	exp->tuple.dst.u.tcp.port = htons(port);
 	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = 0xFFFFFFFF;
+	exp->mask.src.ip = htonl(0xFFFFFFFF);
 	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = 0xFFFFFFFF;
-	exp->mask.dst.u.tcp.port = 0xFFFF;
+	exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	exp->mask.dst.u.tcp.port = htons(0xFFFF);
 	exp->mask.dst.protonum = 0xFF;
 	exp->flags = 0;
 
@@ -791,7 +791,7 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
 	int dir = CTINFO2DIR(ctinfo);
 	int ret;
 	int i;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 
 	DEBUGP("ip_ct_q931: Setup\n");
@@ -1188,7 +1188,7 @@ static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen)
 
 /****************************************************************************/
 static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
-					       u_int32_t ip, u_int16_t port)
+					       __be32 ip, u_int16_t port)
 {
 	struct ip_conntrack_expect *exp;
 	struct ip_conntrack_tuple tuple;
@@ -1228,7 +1228,7 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
 	int i;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 	struct ip_conntrack_expect *exp;
 
@@ -1251,10 +1251,10 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
 	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
 	exp->tuple.dst.u.tcp.port = htons(port);
 	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = gkrouted_only ? 0xFFFFFFFF : 0;
+	exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0;
 	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = 0xFFFFFFFF;
-	exp->mask.dst.u.tcp.port = 0xFFFF;
+	exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	exp->mask.dst.u.tcp.port = htons(0xFFFF);
 	exp->mask.dst.protonum = 0xFF;
 	exp->flags = IP_CT_EXPECT_PERMANENT;	/* Accept multiple calls */
 
@@ -1307,7 +1307,7 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 	struct ip_conntrack_expect *exp;
 
@@ -1333,10 +1333,10 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct,
 	exp->tuple.dst.ip = ip;
 	exp->tuple.dst.u.tcp.port = htons(port);
 	exp->tuple.dst.protonum = IPPROTO_UDP;
-	exp->mask.src.ip = 0xFFFFFFFF;
+	exp->mask.src.ip = htonl(0xFFFFFFFF);
 	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = 0xFFFFFFFF;
-	exp->mask.dst.u.tcp.port = 0xFFFF;
+	exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	exp->mask.dst.u.tcp.port = htons(0xFFFF);
 	exp->mask.dst.protonum = 0xFF;
 	exp->flags = 0;
 	exp->expectfn = ip_conntrack_ras_expect;
@@ -1477,7 +1477,7 @@ static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct,
 {
 	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
 	int dir = CTINFO2DIR(ctinfo);
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 
 	DEBUGP("ip_ct_ras: ARQ\n");
@@ -1513,7 +1513,7 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 	struct ip_conntrack_expect *exp;
 
@@ -1538,10 +1538,10 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
 	exp->tuple.dst.ip = ip;
 	exp->tuple.dst.u.tcp.port = htons(port);
 	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = 0xFFFFFFFF;
+	exp->mask.src.ip = htonl(0xFFFFFFFF);
 	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = 0xFFFFFFFF;
-	exp->mask.dst.u.tcp.port = 0xFFFF;
+	exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	exp->mask.dst.u.tcp.port = htons(0xFFFF);
 	exp->mask.dst.protonum = 0xFF;
 	exp->flags = IP_CT_EXPECT_PERMANENT;
 	exp->expectfn = ip_conntrack_q931_expect;
@@ -1581,7 +1581,7 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	int ret = 0;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 	struct ip_conntrack_expect *exp = NULL;
 
@@ -1598,10 +1598,10 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct,
 	exp->tuple.dst.ip = ip;
 	exp->tuple.dst.u.tcp.port = htons(port);
 	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = 0xFFFFFFFF;
+	exp->mask.src.ip = htonl(0xFFFFFFFF);
 	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = 0xFFFFFFFF;
-	exp->mask.dst.u.tcp.port = 0xFFFF;
+	exp->mask.dst.ip = htonl(0xFFFFFFFF);
+	exp->mask.dst.u.tcp.port = htons(0xFFFF);
 	exp->mask.dst.protonum = 0xFF;
 	exp->flags = IP_CT_EXPECT_PERMANENT;
 	exp->expectfn = ip_conntrack_q931_expect;
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
index 419b878fb467..4a7d34466ee2 100644
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ b/net/ipv4/netfilter/ip_nat_helper_h323.c
@@ -32,13 +32,13 @@
 /****************************************************************************/
 static int set_addr(struct sk_buff **pskb,
 		    unsigned char **data, int dataoff,
-		    unsigned int addroff, u_int32_t ip, u_int16_t port)
+		    unsigned int addroff, __be32 ip, u_int16_t port)
 {
 	enum ip_conntrack_info ctinfo;
 	struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo);
 	struct {
-		u_int32_t ip;
-		u_int16_t port;
+		__be32 ip;
+		__be16 port;
 	} __attribute__ ((__packed__)) buf;
 	struct tcphdr _tcph, *th;
 
@@ -86,7 +86,7 @@ static int set_addr(struct sk_buff **pskb,
 static int set_h225_addr(struct sk_buff **pskb,
 			 unsigned char **data, int dataoff,
 			 TransportAddress * addr,
-			 u_int32_t ip, u_int16_t port)
+			 __be32 ip, u_int16_t port)
 {
 	return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port);
 }
@@ -95,7 +95,7 @@ static int set_h225_addr(struct sk_buff **pskb,
 static int set_h245_addr(struct sk_buff **pskb,
 			 unsigned char **data, int dataoff,
 			 H245_TransportAddress * addr,
-			 u_int32_t ip, u_int16_t port)
+			 __be32 ip, u_int16_t port)
 {
 	return set_addr(pskb, data, dataoff,
 			addr->unicastAddress.iPAddress.network, ip, port);
@@ -110,7 +110,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
 	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
 	int dir = CTINFO2DIR(ctinfo);
 	int i;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 
 	for (i = 0; i < count; i++) {
@@ -164,7 +164,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	int i;
-	u_int32_t ip;
+	__be32 ip;
 	u_int16_t port;
 
 	for (i = 0; i < count; i++) {
@@ -433,7 +433,7 @@ static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
 	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
 	int dir = CTINFO2DIR(ctinfo);
 	u_int16_t nated_port = port;
-	u_int32_t ip;
+	__be32 ip;
 
 	/* Set expectations for NAT */
 	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-- 
cgit v1.2.3


From 32f50cdee666333168b5203c7864bede159f789e Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Thu, 28 Sep 2006 14:51:47 -0700
Subject: [NetLabel]: add audit support for configuration changes

This patch adds audit support to NetLabel, including six new audit message
types shown below.

 #define AUDIT_MAC_UNLBL_ACCEPT 1406
 #define AUDIT_MAC_UNLBL_DENY   1407
 #define AUDIT_MAC_CIPSOV4_ADD  1408
 #define AUDIT_MAC_CIPSOV4_DEL  1409
 #define AUDIT_MAC_MAP_ADD      1410
 #define AUDIT_MAC_MAP_DEL      1411

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/audit.h              |  6 +++
 include/net/cipso_ipv4.h           |  5 ++-
 include/net/netlabel.h             |  2 +-
 net/ipv4/cipso_ipv4.c              |  8 +++-
 net/netlabel/netlabel_cipso_v4.c   | 43 +++++++++++++-----
 net/netlabel/netlabel_domainhash.c | 54 +++++++++++++++++++---
 net/netlabel/netlabel_domainhash.h |  6 +--
 net/netlabel/netlabel_mgmt.c       | 14 +++---
 net/netlabel/netlabel_unlabeled.c  | 36 ++++++++++++---
 net/netlabel/netlabel_user.c       | 91 ++++++++++++++++++++++++++++++++++++++
 net/netlabel/netlabel_user.h       |  6 +++
 11 files changed, 235 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 40a6c26294ae..42719d07612a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -95,6 +95,12 @@
 #define AUDIT_MAC_POLICY_LOAD	1403	/* Policy file load */
 #define AUDIT_MAC_STATUS	1404	/* Changed enforcing,permissive,off */
 #define AUDIT_MAC_CONFIG_CHANGE	1405	/* Changes to booleans */
+#define AUDIT_MAC_UNLBL_ACCEPT	1406	/* NetLabel: allow unlabeled traffic */
+#define AUDIT_MAC_UNLBL_DENY	1407	/* NetLabel: deny unlabeled traffic */
+#define AUDIT_MAC_CIPSOV4_ADD	1408	/* NetLabel: add CIPSOv4 DOI entry */
+#define AUDIT_MAC_CIPSOV4_DEL	1409	/* NetLabel: del CIPSOv4 DOI entry */
+#define AUDIT_MAC_MAP_ADD	1410	/* NetLabel: add LSM domain mapping */
+#define AUDIT_MAC_MAP_DEL	1411	/* NetLabel: del LSM domain mapping */
 
 #define AUDIT_FIRST_KERN_ANOM_MSG   1700
 #define AUDIT_LAST_KERN_ANOM_MSG    1799
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 2d72496c2029..5d6ae1b2b196 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -128,7 +128,9 @@ extern int cipso_v4_rbm_strictvalid;
 
 #ifdef CONFIG_NETLABEL
 int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
-int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head));
+int cipso_v4_doi_remove(u32 doi,
+			u32 audit_secid,
+			void (*callback) (struct rcu_head * head));
 struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
 int cipso_v4_doi_walk(u32 *skip_cnt,
 		     int (*callback) (struct cipso_v4_doi *doi_def, void *arg),
@@ -143,6 +145,7 @@ static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
 }
 
 static inline int cipso_v4_doi_remove(u32 doi,
+				    u32 audit_secid,
 				    void (*callback) (struct rcu_head * head))
 {
 	return 0;
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 6692430063fd..190bfdbbdba6 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -96,7 +96,7 @@
 struct netlbl_dom_map;
 
 /* Domain mapping operations */
-int netlbl_domhsh_remove(const char *domain);
+int netlbl_domhsh_remove(const char *domain, u32 audit_secid);
 
 /* LSM security attributes */
 struct netlbl_lsm_cache {
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index e6ce0b3ba62a..c4e469ff842d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -474,6 +474,7 @@ doi_add_failure_rlock:
 /**
  * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine
  * @doi: the DOI value
+ * @audit_secid: the LSM secid to use in the audit message
  * @callback: the DOI cleanup/free callback
  *
  * Description:
@@ -483,7 +484,9 @@ doi_add_failure_rlock:
  * success and negative values on failure.
  *
  */
-int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head))
+int cipso_v4_doi_remove(u32 doi,
+			u32 audit_secid,
+			void (*callback) (struct rcu_head * head))
 {
 	struct cipso_v4_doi *doi_def;
 	struct cipso_v4_domhsh_entry *dom_iter;
@@ -502,7 +505,8 @@ int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head))
 		spin_unlock(&cipso_v4_doi_list_lock);
 		list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
 			if (dom_iter->valid)
-				netlbl_domhsh_remove(dom_iter->domain);
+				netlbl_domhsh_remove(dom_iter->domain,
+						     audit_secid);
 		cipso_v4_cache_invalidate();
 		rcu_read_unlock();
 
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 4125a55f469f..09986ca962a6 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -32,6 +32,7 @@
 #include <linux/socket.h>
 #include <linux/string.h>
 #include <linux/skbuff.h>
+#include <linux/audit.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -162,8 +163,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info)
 	int nla_a_rem;
 	int nla_b_rem;
 
-	if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
-	    !info->attrs[NLBL_CIPSOV4_A_TAGLST] ||
+	if (!info->attrs[NLBL_CIPSOV4_A_TAGLST] ||
 	    !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST])
 		return -EINVAL;
 
@@ -344,8 +344,7 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info)
 	int ret_val;
 	struct cipso_v4_doi *doi_def = NULL;
 
-	if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
-	    !info->attrs[NLBL_CIPSOV4_A_TAGLST])
+	if (!info->attrs[NLBL_CIPSOV4_A_TAGLST])
 		return -EINVAL;
 
 	doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL);
@@ -381,21 +380,35 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
 
 {
 	int ret_val = -EINVAL;
-	u32 map_type;
+	u32 type;
+	u32 doi;
+	const char *type_str = "(unknown)";
+	struct audit_buffer *audit_buf;
 
-	if (!info->attrs[NLBL_CIPSOV4_A_MTYPE])
+	if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
+	    !info->attrs[NLBL_CIPSOV4_A_MTYPE])
 		return -EINVAL;
 
-	map_type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
-	switch (map_type) {
+	type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
+	switch (type) {
 	case CIPSO_V4_MAP_STD:
+		type_str = "std";
 		ret_val = netlbl_cipsov4_add_std(info);
 		break;
 	case CIPSO_V4_MAP_PASS:
+		type_str = "pass";
 		ret_val = netlbl_cipsov4_add_pass(info);
 		break;
 	}
 
+	if (ret_val == 0) {
+		doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+		audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
+						      NETLINK_CB(skb).sid);
+		audit_log_format(audit_buf, " doi=%u type=%s", doi, type_str);
+		audit_log_end(audit_buf);
+	}
+
 	return ret_val;
 }
 
@@ -653,11 +666,21 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb,
 static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 {
 	int ret_val = -EINVAL;
-	u32 doi;
+	u32 doi = 0;
+	struct audit_buffer *audit_buf;
 
 	if (info->attrs[NLBL_CIPSOV4_A_DOI]) {
 		doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
-		ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free);
+		ret_val = cipso_v4_doi_remove(doi,
+					      NETLINK_CB(skb).sid,
+					      netlbl_cipsov4_doi_free);
+	}
+
+	if (ret_val == 0) {
+		audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
+						      NETLINK_CB(skb).sid);
+		audit_log_format(audit_buf, " doi=%u", doi);
+		audit_log_end(audit_buf);
 	}
 
 	return ret_val;
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f56d7a8ac7b7..d64e2ae3b129 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -35,12 +35,14 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/audit.h>
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <asm/bug.h>
 
 #include "netlabel_mgmt.h"
 #include "netlabel_domainhash.h"
+#include "netlabel_user.h"
 
 struct netlbl_domhsh_tbl {
 	struct list_head *tbl;
@@ -186,6 +188,7 @@ int netlbl_domhsh_init(u32 size)
 /**
  * netlbl_domhsh_add - Adds a entry to the domain hash table
  * @entry: the entry to add
+ * @audit_secid: the LSM secid to use in the audit message
  *
  * Description:
  * Adds a new entry to the domain hash table and handles any updates to the
@@ -193,10 +196,12 @@ int netlbl_domhsh_init(u32 size)
  * negative on failure.
  *
  */
-int netlbl_domhsh_add(struct netlbl_dom_map *entry)
+int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid)
 {
 	int ret_val;
 	u32 bkt;
+	struct audit_buffer *audit_buf;
+	char *audit_domain;
 
 	switch (entry->type) {
 	case NETLBL_NLTYPE_UNLABELED:
@@ -236,6 +241,26 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry)
 		spin_unlock(&netlbl_domhsh_def_lock);
 	} else
 		ret_val = -EINVAL;
+	if (ret_val == 0) {
+		if (entry->domain != NULL)
+			audit_domain = entry->domain;
+		else
+			audit_domain = "(default)";
+		audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD,
+						      audit_secid);
+		audit_log_format(audit_buf, " domain=%s", audit_domain);
+		switch (entry->type) {
+		case NETLBL_NLTYPE_UNLABELED:
+			audit_log_format(audit_buf, " protocol=unlbl");
+			break;
+		case NETLBL_NLTYPE_CIPSOV4:
+			audit_log_format(audit_buf,
+					 " protocol=cipsov4 doi=%u",
+					 entry->type_def.cipsov4->doi);
+			break;
+		}
+		audit_log_end(audit_buf);
+	}
 	rcu_read_unlock();
 
 	if (ret_val != 0) {
@@ -254,6 +279,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry)
 /**
  * netlbl_domhsh_add_default - Adds the default entry to the domain hash table
  * @entry: the entry to add
+ * @audit_secid: the LSM secid to use in the audit message
  *
  * Description:
  * Adds a new default entry to the domain hash table and handles any updates
@@ -261,14 +287,15 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry)
  * negative on failure.
  *
  */
-int netlbl_domhsh_add_default(struct netlbl_dom_map *entry)
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid)
 {
-	return netlbl_domhsh_add(entry);
+	return netlbl_domhsh_add(entry, audit_secid);
 }
 
 /**
  * netlbl_domhsh_remove - Removes an entry from the domain hash table
  * @domain: the domain to remove
+ * @audit_secid: the LSM secid to use in the audit message
  *
  * Description:
  * Removes an entry from the domain hash table and handles any updates to the
@@ -276,10 +303,12 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry)
  * negative on failure.
  *
  */
-int netlbl_domhsh_remove(const char *domain)
+int netlbl_domhsh_remove(const char *domain, u32 audit_secid)
 {
 	int ret_val = -ENOENT;
 	struct netlbl_dom_map *entry;
+	struct audit_buffer *audit_buf;
+	char *audit_domain;
 
 	rcu_read_lock();
 	if (domain != NULL)
@@ -316,8 +345,18 @@ int netlbl_domhsh_remove(const char *domain)
 			ret_val = -ENOENT;
 		spin_unlock(&netlbl_domhsh_def_lock);
 	}
-	if (ret_val == 0)
+	if (ret_val == 0) {
+		if (entry->domain != NULL)
+			audit_domain = entry->domain;
+		else
+			audit_domain = "(default)";
+		audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL,
+						      audit_secid);
+		audit_log_format(audit_buf, " domain=%s", audit_domain);
+		audit_log_end(audit_buf);
+
 		call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
+	}
 
 remove_return:
 	rcu_read_unlock();
@@ -326,6 +365,7 @@ remove_return:
 
 /**
  * netlbl_domhsh_remove_default - Removes the default entry from the table
+ * @audit_secid: the LSM secid to use in the audit message
  *
  * Description:
  * Removes/resets the default entry for the domain hash table and handles any
@@ -333,9 +373,9 @@ remove_return:
  * success, non-zero on failure.
  *
  */
-int netlbl_domhsh_remove_default(void)
+int netlbl_domhsh_remove_default(u32 audit_secid)
 {
-	return netlbl_domhsh_remove(NULL);
+	return netlbl_domhsh_remove(NULL, audit_secid);
 }
 
 /**
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 02af72a7877c..d50f13cacdca 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -57,9 +57,9 @@ struct netlbl_dom_map {
 int netlbl_domhsh_init(u32 size);
 
 /* Manipulate the domain hash table */
-int netlbl_domhsh_add(struct netlbl_dom_map *entry);
-int netlbl_domhsh_add_default(struct netlbl_dom_map *entry);
-int netlbl_domhsh_remove_default(void);
+int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid);
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid);
+int netlbl_domhsh_remove_default(u32 audit_secid);
 struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
 int netlbl_domhsh_walk(u32 *skip_bkt,
 		     u32 *skip_chain,
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 8626c9f678eb..0ac314f18ad1 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -108,7 +108,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
 
 	switch (entry->type) {
 	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = netlbl_domhsh_add(entry);
+		ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid);
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
 		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
@@ -125,7 +125,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
 			rcu_read_unlock();
 			goto add_failure;
 		}
-		ret_val = netlbl_domhsh_add(entry);
+		ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid);
 		rcu_read_unlock();
 		break;
 	default:
@@ -161,7 +161,7 @@ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 
 	domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]);
-	return netlbl_domhsh_remove(domain);
+	return netlbl_domhsh_remove(domain, NETLINK_CB(skb).sid);
 }
 
 /**
@@ -277,7 +277,8 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 
 	switch (entry->type) {
 	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = netlbl_domhsh_add_default(entry);
+		ret_val = netlbl_domhsh_add_default(entry,
+						    NETLINK_CB(skb).sid);
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
 		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
@@ -294,7 +295,8 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 			rcu_read_unlock();
 			goto adddef_failure;
 		}
-		ret_val = netlbl_domhsh_add_default(entry);
+		ret_val = netlbl_domhsh_add_default(entry,
+						    NETLINK_CB(skb).sid);
 		rcu_read_unlock();
 		break;
 	default:
@@ -322,7 +324,7 @@ adddef_failure:
  */
 static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
 {
-	return netlbl_domhsh_remove_default();
+	return netlbl_domhsh_remove_default(NETLINK_CB(skb).sid);
 }
 
 /**
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 440f5c4e1e2d..ab36675fee8c 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -63,6 +63,27 @@ static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
 	[NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 },
 };
 
+/*
+ * Helper Functions
+ */
+
+/**
+ * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag
+ * @value: desired value
+ * @audit_secid: the LSM secid to use in the audit message
+ *
+ * Description:
+ * Set the value of the unlabeled accept flag to @value.
+ *
+ */
+static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid)
+{
+	atomic_set(&netlabel_unlabel_accept_flg, value);
+	netlbl_audit_nomsg((value ?
+			    AUDIT_MAC_UNLBL_ACCEPT : AUDIT_MAC_UNLBL_DENY),
+			   audit_secid);
+}
+
 /*
  * NetLabel Command Handlers
  */
@@ -79,18 +100,18 @@ static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
  */
 static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
 {
-	int ret_val = -EINVAL;
 	u8 value;
 
 	if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
 		value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
 		if (value == 1 || value == 0) {
-			atomic_set(&netlabel_unlabel_accept_flg, value);
-			ret_val = 0;
+			netlbl_unlabel_acceptflg_set(value,
+						     NETLINK_CB(skb).sid);
+			return 0;
 		}
 	}
 
-	return ret_val;
+	return -EINVAL;
 }
 
 /**
@@ -229,16 +250,19 @@ int netlbl_unlabel_defconf(void)
 {
 	int ret_val;
 	struct netlbl_dom_map *entry;
+	u32 secid;
+
+	security_task_getsecid(current, &secid);
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (entry == NULL)
 		return -ENOMEM;
 	entry->type = NETLBL_NLTYPE_UNLABELED;
-	ret_val = netlbl_domhsh_add_default(entry);
+	ret_val = netlbl_domhsh_add_default(entry, secid);
 	if (ret_val != 0)
 		return ret_val;
 
-	atomic_set(&netlabel_unlabel_accept_flg, 1);
+	netlbl_unlabel_acceptflg_set(1, secid);
 
 	return 0;
 }
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index eeb7d768d2bb..c2343af584cb 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -32,6 +32,9 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/socket.h>
+#include <linux/audit.h>
+#include <linux/tty.h>
+#include <linux/security.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -74,3 +77,91 @@ int netlbl_netlink_init(void)
 
 	return 0;
 }
+
+/*
+ * NetLabel Audit Functions
+ */
+
+/**
+ * netlbl_audit_start_common - Start an audit message
+ * @type: audit message type
+ * @secid: LSM context ID
+ *
+ * Description:
+ * Start an audit message using the type specified in @type and fill the audit
+ * message with some fields common to all NetLabel audit messages.  Returns
+ * a pointer to the audit buffer on success, NULL on failure.
+ *
+ */
+struct audit_buffer *netlbl_audit_start_common(int type, u32 secid)
+{
+	struct audit_context *audit_ctx = current->audit_context;
+	struct audit_buffer *audit_buf;
+	uid_t audit_loginuid;
+	const char *audit_tty;
+	char audit_comm[sizeof(current->comm)];
+	struct vm_area_struct *vma;
+	char *secctx;
+	u32 secctx_len;
+
+	audit_buf = audit_log_start(audit_ctx, GFP_ATOMIC, type);
+	if (audit_buf == NULL)
+		return NULL;
+
+	audit_loginuid = audit_get_loginuid(audit_ctx);
+	if (current->signal &&
+	    current->signal->tty &&
+	    current->signal->tty->name)
+		audit_tty = current->signal->tty->name;
+	else
+		audit_tty = "(none)";
+	get_task_comm(audit_comm, current);
+
+	audit_log_format(audit_buf,
+			 "netlabel: auid=%u uid=%u tty=%s pid=%d",
+			 audit_loginuid,
+			 current->uid,
+			 audit_tty,
+			 current->pid);
+	audit_log_format(audit_buf, " comm=");
+	audit_log_untrustedstring(audit_buf, audit_comm);
+	if (current->mm) {
+		down_read(&current->mm->mmap_sem);
+		vma = current->mm->mmap;
+		while (vma) {
+			if ((vma->vm_flags & VM_EXECUTABLE) &&
+			    vma->vm_file) {
+				audit_log_d_path(audit_buf,
+						 " exe=",
+						 vma->vm_file->f_dentry,
+						 vma->vm_file->f_vfsmnt);
+				break;
+			}
+			vma = vma->vm_next;
+		}
+		up_read(&current->mm->mmap_sem);
+	}
+
+	if (secid != 0 &&
+	    security_secid_to_secctx(secid, &secctx, &secctx_len) == 0)
+		audit_log_format(audit_buf, " subj=%s", secctx);
+
+	return audit_buf;
+}
+
+/**
+ * netlbl_audit_nomsg - Send an audit message without additional text
+ * @type: audit message type
+ * @secid: LSM context ID
+ *
+ * Description:
+ * Send an audit message with only the common NetLabel audit fields.
+ *
+ */
+void netlbl_audit_nomsg(int type, u32 secid)
+{
+	struct audit_buffer *audit_buf;
+
+	audit_buf = netlbl_audit_start_common(type, secid);
+	audit_log_end(audit_buf);
+}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 3f9386b917df..ab840acfc964 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -34,6 +34,7 @@
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/capability.h>
+#include <linux/audit.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 #include <net/netlabel.h>
@@ -75,4 +76,9 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
 
 int netlbl_netlink_init(void);
 
+/* NetLabel Audit Functions */
+
+struct audit_buffer *netlbl_audit_start_common(int type, u32 secid);
+void netlbl_audit_nomsg(int type, u32 secid);
+
 #endif
-- 
cgit v1.2.3


From 0891a8d706d6e6838a926b6dec42f95581747d0e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Fri, 29 Sep 2006 01:58:34 -0700
Subject: [PATCH] __percpu_alloc_mask() has to be __always_inline in UP case

...  or we'll end up with cpu_online_map being evaluated on UP.  In
modules.  cpumask.h is very careful to avoid that, and for a very good
reason.  So should we...

PS: yes, it really triggers (on alpha).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 3835a9642f13..46ec72fa2c84 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -74,7 +74,7 @@ static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
 	return 0;
 }
 
-static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
 {
 	return kzalloc(size, gfp);
 }
-- 
cgit v1.2.3


From e04da1dfd9041e306cb33d1b40b6005c23c5b325 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Fri, 29 Sep 2006 01:58:35 -0700
Subject: [PATCH] sys_getcpu() prototype annotated

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3f0f716225ec..2d1c3d5c83ac 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -597,6 +597,6 @@ asmlinkage long sys_get_robust_list(int pid,
 				    size_t __user *len_ptr);
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
-asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache);
+asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
 
 #endif
-- 
cgit v1.2.3


From f71b2f10f56802075d67c5710cd9f1816382d720 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Fri, 29 Sep 2006 01:58:39 -0700
Subject: [PATCH] JBD: Make journal_brelse_array() static

It's always good to make symbols static when we can, and this also eliminates
the need to rename the function in jbd2

Suggested by Eric Sandeen.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Cc: Eric Sandeen <sandeen@sandeen.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/recovery.c   | 2 +-
 include/linux/jbd.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 445eed6ce5dc..11563fe2a52b 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -46,7 +46,7 @@ static int scan_revoke_records(journal_t *, struct buffer_head *,
 #ifdef __KERNEL__
 
 /* Release readahead buffers after use */
-void journal_brelse_array(struct buffer_head *b[], int n)
+static void journal_brelse_array(struct buffer_head *b[], int n)
 {
 	while (--n >= 0)
 		brelse (b[n]);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index a6d9daa38c6d..fe89444b1c6f 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -977,7 +977,6 @@ extern void	   journal_write_revoke_records(journal_t *, transaction_t *);
 extern int	journal_set_revoke(journal_t *, unsigned long, tid_t);
 extern int	journal_test_revoke(journal_t *, unsigned long, tid_t);
 extern void	journal_clear_revoke(journal_t *);
-extern void	journal_brelse_array(struct buffer_head *b[], int n);
 extern void	journal_switch_revoke_table(journal_t *journal);
 
 /*
-- 
cgit v1.2.3


From 2dcea57ae19275451a756a2d5bf96b329487b0e0 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 29 Sep 2006 01:58:41 -0700
Subject: [PATCH] convert s390 page handling macros to functions

Convert s390 page handling macros to functions.  In particular this fixes a
problem with s390's SetPageUptodate macro which uses its input parameter
twice which again can cause subtle bugs.

[akpm@osdl.org: build fix]
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/appldata/appldata_base.c |  2 +-
 include/asm-s390/pgtable.h         | 84 ++++++++++++++++++--------------------
 include/linux/page-flags.h         | 11 +++--
 3 files changed, 46 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 9a8f6ff21652..2b1e6c9a6e0e 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -16,7 +16,7 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
-#include <linux/page-flags.h>
+#include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 #include <linux/sysctl.h>
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 83425cdefc91..ecdff13b2505 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -31,9 +31,9 @@
  * the S390 page table tree.
  */
 #ifndef __ASSEMBLY__
+#include <linux/mm_types.h>
 #include <asm/bug.h>
 #include <asm/processor.h>
-#include <linux/threads.h>
 
 struct vm_area_struct; /* forward declaration (include/linux/mm.h) */
 struct mm_struct;
@@ -597,31 +597,31 @@ ptep_establish(struct vm_area_struct *vma,
  * should therefore only be called if it is not mapped in any
  * address space.
  */
-#define page_test_and_clear_dirty(_page)				  \
-({									  \
-	struct page *__page = (_page);					  \
-	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);  \
-	int __skey = page_get_storage_key(__physpage);			  \
-	if (__skey & _PAGE_CHANGED)					  \
-		page_set_storage_key(__physpage, __skey & ~_PAGE_CHANGED);\
-	(__skey & _PAGE_CHANGED);					  \
-})
+static inline int page_test_and_clear_dirty(struct page *page)
+{
+	unsigned long physpage = __pa((page - mem_map) << PAGE_SHIFT);
+	int skey = page_get_storage_key(physpage);
+
+	if (skey & _PAGE_CHANGED)
+		page_set_storage_key(physpage, skey & ~_PAGE_CHANGED);
+	return skey & _PAGE_CHANGED;
+}
 
 /*
  * Test and clear referenced bit in storage key.
  */
-#define page_test_and_clear_young(page)					\
-({									\
-	struct page *__page = (page);					\
-	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);\
-	int __ccode;							\
-	asm volatile(							\
-		"	rrbe	0,%1\n"					\
-		"	ipm	%0\n"					\
-		"	srl	%0,28\n"				\
-		: "=d" (__ccode) : "a" (__physpage) : "cc");		\
-	(__ccode & 2);							\
-})
+static inline int page_test_and_clear_young(struct page *page)
+{
+	unsigned long physpage = __pa((page - mem_map) << PAGE_SHIFT);
+	int ccode;
+
+	asm volatile (
+		"rrbe 0,%1\n"
+		"ipm  %0\n"
+		"srl  %0,28\n"
+		: "=d" (ccode) : "a" (physpage) : "cc" );
+	return ccode & 2;
+}
 
 /*
  * Conversion functions: convert a page and protection to a page entry,
@@ -634,32 +634,28 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 	return __pte;
 }
 
-#define mk_pte(pg, pgprot)                                                \
-({                                                                        \
-	struct page *__page = (pg);                                       \
-	pgprot_t __pgprot = (pgprot);					  \
-	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);  \
-	pte_t __pte = mk_pte_phys(__physpage, __pgprot);                  \
-	__pte;                                                            \
-})
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+	unsigned long physpage = __pa((page - mem_map) << PAGE_SHIFT);
 
-#define pfn_pte(pfn, pgprot)                                              \
-({                                                                        \
-	pgprot_t __pgprot = (pgprot);					  \
-	unsigned long __physpage = __pa((pfn) << PAGE_SHIFT);             \
-	pte_t __pte = mk_pte_phys(__physpage, __pgprot);                  \
-	__pte;                                                            \
-})
+	return mk_pte_phys(physpage, pgprot);
+}
+
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	unsigned long physpage = __pa((pfn) << PAGE_SHIFT);
+
+	return mk_pte_phys(physpage, pgprot);
+}
 
 #ifdef __s390x__
 
-#define pfn_pmd(pfn, pgprot)                                              \
-({                                                                        \
-	pgprot_t __pgprot = (pgprot);                                     \
-	unsigned long __physpage = __pa((pfn) << PAGE_SHIFT);             \
-	pmd_t __pmd = __pmd(__physpage + pgprot_val(__pgprot));           \
-	__pmd;                                                            \
-})
+static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+	unsigned long physpage = __pa((pfn) << PAGE_SHIFT);
+
+	return __pmd(physpage + pgprot_val(pgprot));
+}
 
 #endif /* __s390x__ */
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 9d7921dd50f0..4830a3bedfb2 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -128,12 +128,11 @@
 
 #define PageUptodate(page)	test_bit(PG_uptodate, &(page)->flags)
 #ifdef CONFIG_S390
-#define SetPageUptodate(_page) \
-	do {								      \
-		struct page *__page = (_page);				      \
-		if (!test_and_set_bit(PG_uptodate, &__page->flags))	      \
-			page_test_and_clear_dirty(_page);		      \
-	} while (0)
+static inline void SetPageUptodate(struct page *page)
+{
+	if (!test_and_set_bit(PG_uptodate, &page->flags))
+		page_test_and_clear_dirty(page);
+}
 #else
 #define SetPageUptodate(page)	set_bit(PG_uptodate, &(page)->flags)
 #endif
-- 
cgit v1.2.3


From 199a9afc3dbe98c35326f1d3907ab94dae953a6e Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 29 Sep 2006 01:59:00 -0700
Subject: [PATCH] Debug variants of linked list macros

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/list.h | 15 ++++++++++
 lib/Kconfig.debug    |  9 ++++++
 lib/Makefile         |  1 +
 lib/list_debug.c     | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 102 insertions(+)
 create mode 100644 lib/list_debug.c

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 65a5b5ceda49..a9c90287c0ff 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -39,6 +39,7 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
  * This is only for internal list manipulation where we know
  * the prev/next entries already!
  */
+#ifndef CONFIG_DEBUG_LIST
 static inline void __list_add(struct list_head *new,
 			      struct list_head *prev,
 			      struct list_head *next)
@@ -48,6 +49,11 @@ static inline void __list_add(struct list_head *new,
 	new->prev = prev;
 	prev->next = new;
 }
+#else
+extern void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next);
+#endif
 
 /**
  * list_add - add a new entry
@@ -57,10 +63,15 @@ static inline void __list_add(struct list_head *new,
  * Insert a new entry after the specified head.
  * This is good for implementing stacks.
  */
+#ifndef CONFIG_DEBUG_LIST
 static inline void list_add(struct list_head *new, struct list_head *head)
 {
 	__list_add(new, head, head->next);
 }
+#else
+extern void list_add(struct list_head *new, struct list_head *head);
+#endif
+
 
 /**
  * list_add_tail - add a new entry
@@ -153,12 +164,16 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
  * Note: list_empty on entry does not return true after this, the entry is
  * in an undefined state.
  */
+#ifndef CONFIG_DEBUG_LIST
 static inline void list_del(struct list_head *entry)
 {
 	__list_del(entry->prev, entry->next);
 	entry->next = LIST_POISON1;
 	entry->prev = LIST_POISON2;
 }
+#else
+extern void list_del(struct list_head *entry);
+#endif
 
 /**
  * list_del_rcu - deletes entry from list without re-initialization
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f5ca72599f..f9ae75cc0145 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -320,6 +320,15 @@ config DEBUG_VM
 
 	  If unsure, say N.
 
+config DEBUG_LIST
+	bool "Debug linked list manipulation"
+	depends on DEBUG_KERNEL
+	help
+	  Enable this to turn on extended checks in the linked-list
+	  walking routines.
+
+	  If unsure, say N.
+
 config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
 	depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390 || AVR32 || SUPERH)
diff --git a/lib/Makefile b/lib/Makefile
index ef1d37afbbb6..402762fead70 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -28,6 +28,7 @@ lib-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
 obj-$(CONFIG_PLIST) += plist.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 
 ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
   lib-y += dec_and_lock.o
diff --git a/lib/list_debug.c b/lib/list_debug.c
new file mode 100644
index 000000000000..1aae85cef92c
--- /dev/null
+++ b/lib/list_debug.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2006, Red Hat, Inc., Dave Jones
+ * Released under the General Public License (GPL).
+ *
+ * This file contains the linked list implementations for
+ * DEBUG_LIST.
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+
+void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
+{
+	if (unlikely(next->prev != prev)) {
+		printk(KERN_ERR "list_add corruption. next->prev should be %p, but was %p\n",
+			prev, next->prev);
+		BUG();
+	}
+	if (unlikely(prev->next != next)) {
+		printk(KERN_ERR "list_add corruption. prev->next should be %p, but was %p\n",
+			next, prev->next);
+		BUG();
+	}
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+EXPORT_SYMBOL(__list_add);
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+EXPORT_SYMBOL(list_add);
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+void list_del(struct list_head *entry)
+{
+	if (unlikely(entry->prev->next != entry)) {
+		printk(KERN_ERR "list_del corruption. prev->next should be %p, but was %p\n",
+			entry, entry->prev->next);
+		BUG();
+	}
+	if (unlikely(entry->next->prev != entry)) {
+		printk(KERN_ERR "list_del corruption. next->prev should be %p, but was %p\n",
+			entry, entry->next->prev);
+		BUG();
+	}
+	__list_del(entry->prev, entry->next);
+	entry->next = LIST_POISON1;
+	entry->prev = LIST_POISON2;
+}
+EXPORT_SYMBOL(list_del);
+
-- 
cgit v1.2.3


From 9938406ab6b2558d60c0c7200cc8e12f1ea7104a Mon Sep 17 00:00:00 2001
From: Michal Schmidt <xschmi00@stud.feec.vutbr.cz>
Date: Fri, 29 Sep 2006 01:59:03 -0700
Subject: [PATCH] Make touch_nmi_watchdog imply touch_softlockup_watchdog on
 all archs

touch_nmi_watchdog() calls touch_softlockup_watchdog() on both
architectures that implement it (i386 and x86_64).  On other architectures
it does nothing at all.  touch_nmi_watchdog() should imply
touch_softlockup_watchdog() on all architectures.  Suggested by Andi Kleen.

[heiko.carstens@de.ibm.com: s390 fix]
Signed-off-by: Michal Schmidt <xschmi00@stud.feec.vutbr.cz>
Cc: Andi Kleen <ak@muc.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michal Schmidt <xschmi00@stud.feec.vutbr.cz>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-s390/irq.h | 3 ---
 include/linux/nmi.h    | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-s390/irq.h b/include/asm-s390/irq.h
index bd1a721f7aa2..7da991a858f8 100644
--- a/include/asm-s390/irq.h
+++ b/include/asm-s390/irq.h
@@ -19,8 +19,5 @@ enum interruption_class {
 	NR_IRQS,
 };
 
-#define touch_nmi_watchdog() do { } while(0)
-
 #endif /* __KERNEL__ */
 #endif
-
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index c8f4d2f627d7..e16904e28c3a 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -4,6 +4,7 @@
 #ifndef LINUX_NMI_H
 #define LINUX_NMI_H
 
+#include <linux/sched.h>
 #include <asm/irq.h>
 
 /**
@@ -16,7 +17,7 @@
 #ifdef ARCH_HAS_NMI_WATCHDOG
 extern void touch_nmi_watchdog(void);
 #else
-# define touch_nmi_watchdog() do { } while(0)
+# define touch_nmi_watchdog() touch_softlockup_watchdog()
 #endif
 
 #endif
-- 
cgit v1.2.3


From 58012cd788443b9d144bd7c72260a84b6b30f45d Mon Sep 17 00:00:00 2001
From: Chris Boot <bootc@bootc.net>
Date: Fri, 29 Sep 2006 01:59:07 -0700
Subject: [PATCH] scx200_gpio export cleanups

Use EXPORT_SYMBOL_GPL for new symbols, and declare the struct in the header
file for access by other modules.

Signed-off-by: Chris Boot <bootc@bootc.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/scx200_gpio.c  | 2 +-
 include/linux/scx200_gpio.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c
index b1f88c66b2b5..99e5272e3c53 100644
--- a/drivers/char/scx200_gpio.c
+++ b/drivers/char/scx200_gpio.c
@@ -44,7 +44,7 @@ struct nsc_gpio_ops scx200_gpio_ops = {
 	.gpio_change	= scx200_gpio_change,
 	.gpio_current	= scx200_gpio_current
 };
-EXPORT_SYMBOL(scx200_gpio_ops);
+EXPORT_SYMBOL_GPL(scx200_gpio_ops);
 
 static int scx200_gpio_open(struct inode *inode, struct file *file)
 {
diff --git a/include/linux/scx200_gpio.h b/include/linux/scx200_gpio.h
index 90dd069cc145..1a82d30c4b17 100644
--- a/include/linux/scx200_gpio.h
+++ b/include/linux/scx200_gpio.h
@@ -4,6 +4,7 @@ u32 scx200_gpio_configure(unsigned index, u32 set, u32 clear);
 
 extern unsigned scx200_gpio_base;
 extern long scx200_gpio_shadow[2];
+extern struct nsc_gpio_ops scx200_gpio_ops;
 
 #define scx200_gpio_present() (scx200_gpio_base!=0)
 
-- 
cgit v1.2.3


From 6c9979185c7ef4feeb7f8d29be032b8f032a1838 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Fri, 29 Sep 2006 01:59:11 -0700
Subject: [PATCH] kthread: convert loop.c to kthread

Convert loop.c from the deprecated kernel_thread to kthread.  This patch
simplifies the code quite a bit and passes similar testing to the previous
submission on both emulated x86 and s390.

Changes since last submission:
	switched to using a rather simple loop based on
	wait_event_interruptible.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/loop.c | 69 ++++++++++++++++++----------------------------------
 include/linux/loop.h |  5 ++--
 2 files changed, 26 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c774121684d7..e87b88731adc 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -72,6 +72,7 @@
 #include <linux/completion.h>
 #include <linux/highmem.h>
 #include <linux/gfp.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 
@@ -522,15 +523,12 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio)
 		goto out;
 	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
 		goto out;
-	lo->lo_pending++;
 	loop_add_bio(lo, old_bio);
+	wake_up(&lo->lo_event);
 	spin_unlock_irq(&lo->lo_lock);
-	complete(&lo->lo_bh_done);
 	return 0;
 
 out:
-	if (lo->lo_pending == 0)
-		complete(&lo->lo_bh_done);
 	spin_unlock_irq(&lo->lo_lock);
 	bio_io_error(old_bio, old_bio->bi_size);
 	return 0;
@@ -570,14 +568,18 @@ static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
  * to avoid blocking in our make_request_fn. it also does loop decrypting
  * on reads for block backed loop, as that is too heavy to do from
  * b_end_io context where irqs may be disabled.
+ *
+ * Loop explanation:  loop_clr_fd() sets lo_state to Lo_rundown before
+ * calling kthread_stop().  Therefore once kthread_should_stop() is
+ * true, make_request will not place any more requests.  Therefore
+ * once kthread_should_stop() is true and lo_bio is NULL, we are
+ * done with the loop.
  */
 static int loop_thread(void *data)
 {
 	struct loop_device *lo = data;
 	struct bio *bio;
 
-	daemonize("loop%d", lo->lo_number);
-
 	/*
 	 * loop can be used in an encrypted device,
 	 * hence, it mustn't be stopped at all
@@ -587,47 +589,21 @@ static int loop_thread(void *data)
 
 	set_user_nice(current, -20);
 
-	lo->lo_state = Lo_bound;
-	lo->lo_pending = 1;
-
-	/*
-	 * complete it, we are running
-	 */
-	complete(&lo->lo_done);
+	while (!kthread_should_stop() || lo->lo_bio) {
 
-	for (;;) {
-		int pending;
+		wait_event_interruptible(lo->lo_event,
+				lo->lo_bio || kthread_should_stop());
 
-		if (wait_for_completion_interruptible(&lo->lo_bh_done))
+		if (!lo->lo_bio)
 			continue;
-
 		spin_lock_irq(&lo->lo_lock);
-
-		/*
-		 * could be completed because of tear-down, not pending work
-		 */
-		if (unlikely(!lo->lo_pending)) {
-			spin_unlock_irq(&lo->lo_lock);
-			break;
-		}
-
 		bio = loop_get_bio(lo);
-		lo->lo_pending--;
-		pending = lo->lo_pending;
 		spin_unlock_irq(&lo->lo_lock);
 
 		BUG_ON(!bio);
 		loop_handle_bio(lo, bio);
-
-		/*
-		 * upped both for pending work and tear-down, lo_pending
-		 * will hit zero then
-		 */
-		if (unlikely(!pending))
-			break;
 	}
 
-	complete(&lo->lo_done);
 	return 0;
 }
 
@@ -840,10 +816,15 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
 
 	set_blocksize(bdev, lo_blocksize);
 
-	error = kernel_thread(loop_thread, lo, CLONE_KERNEL);
-	if (error < 0)
+	lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
+						lo->lo_number);
+	if (IS_ERR(lo->lo_thread)) {
+		error = PTR_ERR(lo->lo_thread);
+		lo->lo_thread = NULL;
 		goto out_putf;
-	wait_for_completion(&lo->lo_done);
+	}
+	lo->lo_state = Lo_bound;
+	wake_up_process(lo->lo_thread);
 	return 0;
 
  out_putf:
@@ -907,12 +888,9 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 
 	spin_lock_irq(&lo->lo_lock);
 	lo->lo_state = Lo_rundown;
-	lo->lo_pending--;
-	if (!lo->lo_pending)
-		complete(&lo->lo_bh_done);
 	spin_unlock_irq(&lo->lo_lock);
 
-	wait_for_completion(&lo->lo_done);
+	kthread_stop(lo->lo_thread);
 
 	lo->lo_backing_file = NULL;
 
@@ -925,6 +903,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 	lo->lo_sizelimit = 0;
 	lo->lo_encrypt_key_size = 0;
 	lo->lo_flags = 0;
+	lo->lo_thread = NULL;
 	memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
 	memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
 	memset(lo->lo_file_name, 0, LO_NAME_SIZE);
@@ -1287,9 +1266,9 @@ static int __init loop_init(void)
 		if (!lo->lo_queue)
 			goto out_mem4;
 		mutex_init(&lo->lo_ctl_mutex);
-		init_completion(&lo->lo_done);
-		init_completion(&lo->lo_bh_done);
 		lo->lo_number = i;
+		lo->lo_thread = NULL;
+		init_waitqueue_head(&lo->lo_event);
 		spin_lock_init(&lo->lo_lock);
 		disk->major = LOOP_MAJOR;
 		disk->first_minor = i;
diff --git a/include/linux/loop.h b/include/linux/loop.h
index e76c7611d6cc..191a595055f0 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -59,10 +59,9 @@ struct loop_device {
 	struct bio 		*lo_bio;
 	struct bio		*lo_biotail;
 	int			lo_state;
-	struct completion	lo_done;
-	struct completion	lo_bh_done;
 	struct mutex		lo_ctl_mutex;
-	int			lo_pending;
+	struct task_struct	*lo_thread;
+	wait_queue_head_t	lo_event;
 
 	request_queue_t		*lo_queue;
 };
-- 
cgit v1.2.3


From db0b0ead60815155c791e8f479ee4777e7946369 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@mellanox.co.il>
Date: Fri, 29 Sep 2006 01:59:28 -0700
Subject: [PATCH] lockdep: don't pull in includes when lockdep disabled

Do not pull in various includes through lockdep.h if lockdep is disabled.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/lockdep.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index c040a8c969aa..1314ca0f29be 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -8,13 +8,13 @@
 #ifndef __LINUX_LOCKDEP_H
 #define __LINUX_LOCKDEP_H
 
+#ifdef CONFIG_LOCKDEP
+
 #include <linux/linkage.h>
 #include <linux/list.h>
 #include <linux/debug_locks.h>
 #include <linux/stacktrace.h>
 
-#ifdef CONFIG_LOCKDEP
-
 /*
  * Lock-class usage-state bits:
  */
-- 
cgit v1.2.3


From 650a898342b3fa21c392c06a2b7010fa19823efa Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 29 Sep 2006 01:59:35 -0700
Subject: [PATCH] vfs: define new lookup flag for chdir

In the "operation does permission checking" model used by fuse, chdir
permission is not checked, since there's no chdir method.

For this case set a lookup flag, which will be passed to ->permission(), so
fuse can distinguish it from permission checks for other operations.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dir.c         | 2 +-
 fs/open.c             | 3 ++-
 include/linux/namei.h | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 409ce6a7cca4..f85b2a282f13 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -776,7 +776,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
 		if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO))
 			return -EACCES;
 
-		if (nd && (nd->flags & LOOKUP_ACCESS))
+		if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR)))
 			return fuse_access(inode, mask);
 		return 0;
 	}
diff --git a/fs/open.c b/fs/open.c
index 303f06d2a7b9..1574d8fe4909 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -546,7 +546,8 @@ asmlinkage long sys_chdir(const char __user * filename)
 	struct nameidata nd;
 	int error;
 
-	error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+	error = __user_walk(filename,
+			    LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
 	if (error)
 		goto out;
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 45511a5918d3..c6470ba00668 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -54,6 +54,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_OPEN		(0x0100)
 #define LOOKUP_CREATE		(0x0200)
 #define LOOKUP_ACCESS		(0x0400)
+#define LOOKUP_CHDIR		(0x0800)
 
 extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
 extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
-- 
cgit v1.2.3


From 2e0c1f6ce7b816f63fea2af3e5e2cb20c66430e9 Mon Sep 17 00:00:00 2001
From: Shem Multinymous <multinymous@gmail.com>
Date: Fri, 29 Sep 2006 01:59:37 -0700
Subject: [PATCH] DMI: Decode and save OEM String information

This teaches dmi_decode() how to decode and save OEM Strings (type 11) DMI
information, which is currently discarded silently.  Existing code using
DMI is not affected.  Follows the "System Management BIOS (SMBIOS)
Specification" (http://www.dmtf.org/standards/smbios), and also the
userspace dmidecode.c code.

OEM Strings are the only safe way to identify some hardware, e.g., the
ThinkPad embedded controller used by the soon-to-be-submitted tp_smapi
driver.  This will also let us eliminate the long whitelist in the mainline
hdaps driver (in a future patch).

Signed-off-by: Shem Multinymous <multinymous@gmail.com>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/firmware/dmi_scan.c | 23 +++++++++++++++++++++++
 include/linux/dmi.h         |  3 ++-
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index b9e3886d9e16..b8b596d5778d 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -123,6 +123,26 @@ static void __init dmi_save_devices(struct dmi_header *dm)
 		dev->type = *d++ & 0x7f;
 		dev->name = dmi_string(dm, *d);
 		dev->device_data = NULL;
+		list_add(&dev->list, &dmi_devices);
+	}
+}
+
+static void __init dmi_save_oem_strings_devices(struct dmi_header *dm)
+{
+	int i, count = *(u8 *)(dm + 1);
+	struct dmi_device *dev;
+
+	for (i = 1; i <= count; i++) {
+		dev = dmi_alloc(sizeof(*dev));
+		if (!dev) {
+			printk(KERN_ERR
+			   "dmi_save_oem_strings_devices: out of memory.\n");
+			break;
+		}
+
+		dev->type = DMI_DEV_TYPE_OEM_STRING;
+		dev->name = dmi_string(dm, i);
+		dev->device_data = NULL;
 
 		list_add(&dev->list, &dmi_devices);
 	}
@@ -181,6 +201,9 @@ static void __init dmi_decode(struct dmi_header *dm)
 	case 10:	/* Onboard Devices Information */
 		dmi_save_devices(dm);
 		break;
+	case 11:	/* OEM Strings */
+		dmi_save_oem_strings_devices(dm);
+		break;
 	case 38:	/* IPMI Device Information */
 		dmi_save_ipmi_device(dm);
 	}
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index b2cd2071d432..38dc403be70b 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -27,7 +27,8 @@ enum dmi_device_type {
 	DMI_DEV_TYPE_ETHERNET,
 	DMI_DEV_TYPE_TOKENRING,
 	DMI_DEV_TYPE_SOUND,
-	DMI_DEV_TYPE_IPMI = -1
+	DMI_DEV_TYPE_IPMI = -1,
+	DMI_DEV_TYPE_OEM_STRING = -2
 };
 
 struct dmi_header {
-- 
cgit v1.2.3


From 402749ea2538be9ddad981a990739b93a0178bc6 Mon Sep 17 00:00:00 2001
From: Matthias Urlichs <smurf@smurf.noris.de>
Date: Fri, 29 Sep 2006 01:59:37 -0700
Subject: [PATCH] Remove unused tty_struct field

Unused: tty_struct.max_flip_cnt

$ git grep max_flip_cnt
include/linux/tty.h:    int max_flip_cnt;
$

Cc: Paul Fulghum <paulkf@microgate.com>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/tty.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 04827ca65781..d1dec3d0c814 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -190,7 +190,6 @@ struct tty_struct {
 	struct tty_struct *link;
 	struct fasync_struct *fasync;
 	struct tty_bufhead buf;
-	int max_flip_cnt;
 	int alt_speed;		/* For magic substitution of 38400 bps */
 	wait_queue_head_t write_wait;
 	wait_queue_head_t read_wait;
-- 
cgit v1.2.3


From 3d5b6fccc4b900cc4267692f015ea500bad4c6bf Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 29 Sep 2006 01:59:40 -0700
Subject: [PATCH] task_struct: ifdef Missed'em V IPC

ipc/sem.c only.

$ agrep sysvsem -w -n
ipc/sem.c:912:  undo_list = current->sysvsem.undo_list;
ipc/sem.c:932:  undo_list = current->sysvsem.undo_list;
ipc/sem.c:954:  undo_list = current->sysvsem.undo_list;
ipc/sem.c:963:          current->sysvsem.undo_list = undo_list;
ipc/sem.c:1247:         tsk->sysvsem.undo_list = undo_list;
ipc/sem.c:1249:         tsk->sysvsem.undo_list = NULL;
ipc/sem.c:1271: undo_list = tsk->sysvsem.undo_list;
include/linux/sched.h:876:      struct sysv_sem sysvsem;

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9d4aa7f95bc8..27122575d902 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -886,8 +886,10 @@ struct task_struct {
 				     - initialized normally by flush_old_exec */
 /* file system info */
 	int link_count, total_link_count;
+#ifdef CONFIG_SYSVIPC
 /* ipc stuff */
 	struct sysv_sem sysvsem;
+#endif
 /* CPU-specific state of this task */
 	struct thread_struct thread;
 /* filesystem information */
-- 
cgit v1.2.3


From 6c5c934153513dc72e2d6464f39e8ef1f27c0a3e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 29 Sep 2006 01:59:40 -0700
Subject: [PATCH] ifdef blktrace debugging fields

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/blktrace.c       | 6 ++++--
 block/ll_rw_blk.c      | 3 +--
 include/linux/blkdev.h | 4 ++--
 include/linux/sched.h  | 3 ++-
 kernel/fork.c          | 2 ++
 5 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/blktrace.c b/block/blktrace.c
index 2b4ef2b89b8d..8ff33441d8a2 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -450,8 +450,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
  **/
 void blk_trace_shutdown(request_queue_t *q)
 {
-	blk_trace_startstop(q, 0);
-	blk_trace_remove(q);
+	if (q->blk_trace) {
+		blk_trace_startstop(q, 0);
+		blk_trace_remove(q);
+	}
 }
 
 /*
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9c3a06bcb7ba..51dc0edf76e0 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1847,8 +1847,7 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
-	if (q->blk_trace)
-		blk_trace_shutdown(q);
+	blk_trace_shutdown(q);
 
 	kmem_cache_free(requestq_cachep, q);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c773ee545ebd..cfde8b3ee919 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -417,9 +417,9 @@ struct request_queue
 	unsigned int		sg_timeout;
 	unsigned int		sg_reserved_size;
 	int			node;
-
+#ifdef CONFIG_BLK_DEV_IO_TRACE
 	struct blk_trace	*blk_trace;
-
+#endif
 	/*
 	 * reserved for flush operations
 	 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 27122575d902..3696f2f7126d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -784,8 +784,9 @@ struct task_struct {
 	struct prio_array *array;
 
 	unsigned short ioprio;
+#ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
-
+#endif
 	unsigned long sleep_avg;
 	unsigned long long timestamp, last_ran;
 	unsigned long long sched_time; /* sched_clock time spent running */
diff --git a/kernel/fork.c b/kernel/fork.c
index 802b1cf0e63f..bca6ce6d3ded 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -183,7 +183,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	/* One for us, one for whoever does the "release_task()" (usually parent) */
 	atomic_set(&tsk->usage,2);
 	atomic_set(&tsk->fs_excl, 0);
+#ifdef CONFIG_BLK_DEV_IO_TRACE
 	tsk->btrace_seq = 0;
+#endif
 	tsk->splice_pipe = NULL;
 	return tsk;
 }
-- 
cgit v1.2.3


From d6bd3a39f7c6ebad49c261c3d458df974c880758 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Fri, 29 Sep 2006 01:59:48 -0700
Subject: [PATCH] Move valid_dma_direction() from x86_64 to generic code

As suggested by Muli Ben-Yehuda this function is moved to generic code as
may be useful for all archs.

[akpm@osdl.org: fix]
Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Cc: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/pci-swiotlb.c | 3 ++-
 drivers/net/fec_8xx/fec_main.c   | 2 +-
 drivers/net/fs_enet/fs_enet.h    | 3 +--
 include/asm-x86_64/dma-mapping.h | 7 -------
 include/linux/dma-mapping.h      | 7 +++++++
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index 6a55f87ba97f..697f0aa794b9 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -3,7 +3,8 @@
 #include <linux/pci.h>
 #include <linux/cache.h>
 #include <linux/module.h>
-#include <asm/dma-mapping.h>
+#include <linux/dma-mapping.h>
+
 #include <asm/proto.h>
 #include <asm/swiotlb.h>
 #include <asm/dma.h>
diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c
index 22ac2df1aeb0..e17a1449ee10 100644
--- a/drivers/net/fec_8xx/fec_main.c
+++ b/drivers/net/fec_8xx/fec_main.c
@@ -30,6 +30,7 @@
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/bitops.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/8xx_immap.h>
 #include <asm/pgtable.h>
@@ -37,7 +38,6 @@
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 #include <asm/commproc.h>
-#include <asm/dma-mapping.h>
 
 #include "fec_8xx.h"
 
diff --git a/drivers/net/fs_enet/fs_enet.h b/drivers/net/fs_enet/fs_enet.h
index 95022c005f75..92590d8fc24b 100644
--- a/drivers/net/fs_enet/fs_enet.h
+++ b/drivers/net/fs_enet/fs_enet.h
@@ -6,11 +6,10 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/phy.h>
+#include <linux/dma-mapping.h>
 
 #include <linux/fs_enet_pd.h>
 
-#include <asm/dma-mapping.h>
-
 #ifdef CONFIG_CPM1
 #include <asm/commproc.h>
 
diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h
index b6da83dcc7a6..10174b110a5c 100644
--- a/include/asm-x86_64/dma-mapping.h
+++ b/include/asm-x86_64/dma-mapping.h
@@ -55,13 +55,6 @@ extern dma_addr_t bad_dma_address;
 extern struct dma_mapping_ops* dma_ops;
 extern int iommu_merge;
 
-static inline int valid_dma_direction(int dma_direction)
-{
-	return ((dma_direction == DMA_BIDIRECTIONAL) ||
-		(dma_direction == DMA_TO_DEVICE) ||
-		(dma_direction == DMA_FROM_DEVICE));
-}
-
 static inline int dma_mapping_error(dma_addr_t dma_addr)
 {
 	if (dma_ops->mapping_error)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 635690cf3e3d..ff203c465fed 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -24,6 +24,13 @@ enum dma_data_direction {
 #define DMA_28BIT_MASK	0x000000000fffffffULL
 #define DMA_24BIT_MASK	0x0000000000ffffffULL
 
+static inline int valid_dma_direction(int dma_direction)
+{
+	return ((dma_direction == DMA_BIDIRECTIONAL) ||
+		(dma_direction == DMA_TO_DEVICE) ||
+		(dma_direction == DMA_FROM_DEVICE));
+}
+
 #include <asm/dma-mapping.h>
 
 /* Backwards compat, remove in 2.7.x */
-- 
cgit v1.2.3


From 0e51a720b9d9ea5ebf0fda39108919c6626bffa3 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 29 Sep 2006 01:59:56 -0700
Subject: [PATCH] ifdef ->quota_read, ->quota_write

All suppliers of ->quota_read, ->quota_write (I've found ext2, ext3, UFS,
reiserfs) already have them properly ifdeffed.  All callers of
->quota_read, ->quota_write are under CONFIG_QUOTA umbrella, so...

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8f74dfbb2edd..4caec6cebc42 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1143,9 +1143,10 @@ struct super_operations {
 
 	int (*show_options)(struct seq_file *, struct vfsmount *);
 	int (*show_stats)(struct seq_file *, struct vfsmount *);
-
+#ifdef CONFIG_QUOTA
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+#endif
 };
 
 /* Inode state bits.  Protected by inode_lock. */
-- 
cgit v1.2.3


From 068fbb315dd1e9dd3418aac39a9cfeabe39c16a6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 29 Sep 2006 01:59:58 -0700
Subject: [PATCH] reiserfs: ifdef xattr_sem

Shrink reiserfs inode by 12 bytes for xattr non-users (me).

	-reiser_inode_cache     356     11
	+reiser_inode_cache     344     11

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/inode.c            | 4 ++--
 include/linux/reiserfs_fs_i.h  | 2 ++
 include/linux/reiserfs_xattr.h | 8 ++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 8810fda0da46..78f23f406932 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1129,7 +1129,7 @@ static void init_inode(struct inode *inode, struct path *path)
 	REISERFS_I(inode)->i_jl = NULL;
 	REISERFS_I(inode)->i_acl_access = NULL;
 	REISERFS_I(inode)->i_acl_default = NULL;
-	init_rwsem(&REISERFS_I(inode)->xattr_sem);
+	reiserfs_init_xattr_rwsem(inode);
 
 	if (stat_data_v1(ih)) {
 		struct stat_data_v1 *sd =
@@ -1836,7 +1836,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
 	REISERFS_I(inode)->i_acl_access = NULL;
 	REISERFS_I(inode)->i_acl_default = NULL;
-	init_rwsem(&REISERFS_I(inode)->xattr_sem);
+	reiserfs_init_xattr_rwsem(inode);
 
 	if (old_format_only(sb))
 		make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h
index 149be8d9a0c9..711e7e7cafa5 100644
--- a/include/linux/reiserfs_fs_i.h
+++ b/include/linux/reiserfs_fs_i.h
@@ -55,7 +55,9 @@ struct reiserfs_inode_info {
 
 	struct posix_acl *i_acl_access;
 	struct posix_acl *i_acl_default;
+#ifdef CONFIG_REISERFS_FS_XATTR
 	struct rw_semaphore xattr_sem;
+#endif
 	struct inode vfs_inode;
 };
 
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index 5e961035c725..966c35851b2e 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -97,6 +97,11 @@ static inline void reiserfs_mark_inode_private(struct inode *inode)
 	inode->i_flags |= S_PRIVATE;
 }
 
+static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
+{
+	init_rwsem(&REISERFS_I(inode)->xattr_sem);
+}
+
 #else
 
 #define is_reiserfs_priv_object(inode) 0
@@ -129,6 +134,9 @@ static inline int reiserfs_xattr_init(struct super_block *sb, int mount_flags)
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL);	/* to be sure */
 	return 0;
 };
+static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
+{
+}
 #endif
 
 #endif				/* __KERNEL__ */
-- 
cgit v1.2.3


From cfe14677f286c9be5d683b88214def8f4b8a6f24 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 29 Sep 2006 02:00:00 -0700
Subject: [PATCH] reiserfs: ifdef ACL stuff from inode

Shrink reiserfs inode more (by 8 bytes) for ACL non-users:

	-reiser_inode_cache     344     11
	+reiser_inode_cache     336     11

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/inode.c           | 10 ++++++----
 fs/reiserfs/super.c           |  6 ++++++
 include/linux/reiserfs_acl.h  | 17 +++++++++++++++++
 include/linux/reiserfs_fs_i.h |  3 ++-
 4 files changed, 31 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 78f23f406932..7e5a2f5ebeb0 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1127,8 +1127,8 @@ static void init_inode(struct inode *inode, struct path *path)
 	REISERFS_I(inode)->i_prealloc_count = 0;
 	REISERFS_I(inode)->i_trans_id = 0;
 	REISERFS_I(inode)->i_jl = NULL;
-	REISERFS_I(inode)->i_acl_access = NULL;
-	REISERFS_I(inode)->i_acl_default = NULL;
+	reiserfs_init_acl_access(inode);
+	reiserfs_init_acl_default(inode);
 	reiserfs_init_xattr_rwsem(inode);
 
 	if (stat_data_v1(ih)) {
@@ -1834,8 +1834,8 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	REISERFS_I(inode)->i_attrs =
 	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
-	REISERFS_I(inode)->i_acl_access = NULL;
-	REISERFS_I(inode)->i_acl_default = NULL;
+	reiserfs_init_acl_access(inode);
+	reiserfs_init_acl_default(inode);
 	reiserfs_init_xattr_rwsem(inode);
 
 	if (old_format_only(sb))
@@ -1974,11 +1974,13 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	 * iput doesn't deadlock in reiserfs_delete_xattrs. The locking
 	 * code really needs to be reworked, but this will take care of it
 	 * for now. -jeffm */
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
 	if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) {
 		reiserfs_write_unlock_xattrs(dir->i_sb);
 		iput(inode);
 		reiserfs_write_lock_xattrs(dir->i_sb);
 	} else
+#endif
 		iput(inode);
 	return err;
 }
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b40d4d64d598..80fc3b32802f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -510,8 +510,10 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
 	    SLAB_CTOR_CONSTRUCTOR) {
 		INIT_LIST_HEAD(&ei->i_prealloc_list);
 		inode_init_once(&ei->vfs_inode);
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
 		ei->i_acl_access = NULL;
 		ei->i_acl_default = NULL;
+#endif
 	}
 }
 
@@ -560,6 +562,7 @@ static void reiserfs_dirty_inode(struct inode *inode)
 	reiserfs_write_unlock(inode->i_sb);
 }
 
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
 static void reiserfs_clear_inode(struct inode *inode)
 {
 	struct posix_acl *acl;
@@ -574,6 +577,9 @@ static void reiserfs_clear_inode(struct inode *inode)
 		posix_acl_release(acl);
 	REISERFS_I(inode)->i_acl_default = NULL;
 }
+#else
+#define reiserfs_clear_inode NULL
+#endif
 
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h
index 806ec5b06707..fe00f781a622 100644
--- a/include/linux/reiserfs_acl.h
+++ b/include/linux/reiserfs_acl.h
@@ -56,6 +56,16 @@ extern int reiserfs_xattr_posix_acl_init(void) __init;
 extern int reiserfs_xattr_posix_acl_exit(void);
 extern struct reiserfs_xattr_handler posix_acl_default_handler;
 extern struct reiserfs_xattr_handler posix_acl_access_handler;
+
+static inline void reiserfs_init_acl_access(struct inode *inode)
+{
+	REISERFS_I(inode)->i_acl_access = NULL;
+}
+
+static inline void reiserfs_init_acl_default(struct inode *inode)
+{
+	REISERFS_I(inode)->i_acl_default = NULL;
+}
 #else
 
 #define reiserfs_cache_default_acl(inode) 0
@@ -87,4 +97,11 @@ reiserfs_inherit_default_acl(const struct inode *dir, struct dentry *dentry,
 	return 0;
 }
 
+static inline void reiserfs_init_acl_access(struct inode *inode)
+{
+}
+
+static inline void reiserfs_init_acl_default(struct inode *inode)
+{
+}
 #endif
diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h
index 711e7e7cafa5..5b3b297aa2c5 100644
--- a/include/linux/reiserfs_fs_i.h
+++ b/include/linux/reiserfs_fs_i.h
@@ -52,9 +52,10 @@ struct reiserfs_inode_info {
 	 ** flushed */
 	unsigned long i_trans_id;
 	struct reiserfs_journal_list *i_jl;
-
+#ifdef CONFIG_REISERFS_FS_POSIX_ACL
 	struct posix_acl *i_acl_access;
 	struct posix_acl *i_acl_default;
+#endif
 #ifdef CONFIG_REISERFS_FS_XATTR
 	struct rw_semaphore xattr_sem;
 #endif
-- 
cgit v1.2.3


From 50462062a02226a698a211d5bd535376c89b8603 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 29 Sep 2006 02:00:01 -0700
Subject: [PATCH] fs.h: ifdef security fields

[assuming BSD security levels are deleted]
The only user of i_security, f_security, s_security fields is SELinux,
however, quite a few security modules are trying to get into kernel.
So, wrap them under CONFIG_SECURITY. Adding config option for each
security field is likely an overkill.

Following Stephen Smalley's suggestion, i_security initialization is
moved to security_inode_alloc() to not clutter core code with ifdefs
and make alloc_inode() codepath tiny little bit smaller and faster.

The user of (highly greppable) struct fown_struct::security field is
still to be found. I've checked every "fown_struct" and every "f_owner"
occurence. Additionally it's removal doesn't break i386 allmodconfig
build.

struct inode, struct file, struct super_block, struct fown_struct
become smaller.

P.S. Combined with two reiserfs inode shrinking patches sent to
linux-fsdevel, I can finally suck 12 reiserfs inodes into one page.

		/proc/slabinfo

	-ext2_inode_cache	388	10
	+ext2_inode_cache	384	10
	-inode_cache		280	14
	+inode_cache		276	14
	-proc_inode_cache	296	13
	+proc_inode_cache	292	13
	-reiser_inode_cache	336	11
	+reiser_inode_cache	332	12 <=
	-shmem_inode_cache	372	10
	+shmem_inode_cache	368	10

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inode.c               | 1 -
 include/linux/fs.h       | 8 ++++++--
 include/linux/security.h | 1 +
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index f5c04dd9ae8a..abf77471e6c4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -133,7 +133,6 @@ static struct inode *alloc_inode(struct super_block *sb)
 		inode->i_bdev = NULL;
 		inode->i_cdev = NULL;
 		inode->i_rdev = 0;
-		inode->i_security = NULL;
 		inode->dirtied_when = 0;
 		if (security_inode_alloc(inode)) {
 			if (inode->i_sb->s_op->destroy_inode)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4caec6cebc42..6eafbe309483 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -553,7 +553,9 @@ struct inode {
 	unsigned int		i_flags;
 
 	atomic_t		i_writecount;
+#ifdef CONFIG_SECURITY
 	void			*i_security;
+#endif
 	void			*i_private; /* fs or device private pointer */
 #ifdef __NEED_I_SIZE_ORDERED
 	seqcount_t		i_size_seqcount;
@@ -645,7 +647,6 @@ struct fown_struct {
 	rwlock_t lock;          /* protects pid, uid, euid fields */
 	int pid;		/* pid or -pgrp where SIGIO should be sent */
 	uid_t uid, euid;	/* uid/euid of process setting the owner */
-	void *security;
 	int signum;		/* posix.1b rt signal to be delivered on IO */
 };
 
@@ -688,8 +689,9 @@ struct file {
 	struct file_ra_state	f_ra;
 
 	unsigned long		f_version;
+#ifdef CONFIG_SECURITY
 	void			*f_security;
-
+#endif
 	/* needed for tty driver, and maybe others */
 	void			*private_data;
 
@@ -877,7 +879,9 @@ struct super_block {
 	int			s_syncing;
 	int			s_need_sync_fs;
 	atomic_t		s_active;
+#ifdef CONFIG_SECURITY
 	void                    *s_security;
+#endif
 	struct xattr_handler	**s_xattr;
 
 	struct list_head	s_inodes;	/* all inodes */
diff --git a/include/linux/security.h b/include/linux/security.h
index 9f56fb8a4a6c..9b5fea81f55e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1595,6 +1595,7 @@ static inline void security_sb_post_pivotroot (struct nameidata *old_nd,
 
 static inline int security_inode_alloc (struct inode *inode)
 {
+	inode->i_security = NULL;
 	return security_ops->inode_alloc_security (inode);
 }
 
-- 
cgit v1.2.3


From ae78bf9c4f5fde3c67e2829505f195d7347ce3e4 Mon Sep 17 00:00:00 2001
From: Chris Mason <mason@suse.com>
Date: Fri, 29 Sep 2006 02:00:03 -0700
Subject: [PATCH] add -o flush for fat

Fat is commonly used on removable media.  Mounting with -o flush tells the
FS to write things to disk as quickly as possible.  It is like -o sync, but
much faster (and not as safe).

Signed-off-by: Chris Mason <mason@suse.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/file.c            | 13 +++++++++++
 fs/fat/inode.c           | 59 ++++++++++++++++++++++++++++++++++++++++++++++--
 fs/msdos/namei.c         | 11 ++++++++-
 include/linux/msdos_fs.h |  3 +++
 4 files changed, 83 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fat/file.c b/fs/fat/file.c
index 1ee25232e6af..d50fc47169c1 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -13,6 +13,7 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/blkdev.h>
 
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
 		      unsigned int cmd, unsigned long arg)
@@ -112,6 +113,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
 	}
 }
 
+static int fat_file_release(struct inode *inode, struct file *filp)
+{
+	if ((filp->f_mode & FMODE_WRITE) &&
+	     MSDOS_SB(inode->i_sb)->options.flush) {
+		fat_flush_inodes(inode->i_sb, inode, NULL);
+		blk_congestion_wait(WRITE, HZ/10);
+	}
+	return 0;
+}
+
 const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
@@ -121,6 +132,7 @@ const struct file_operations fat_file_operations = {
 	.aio_read	= generic_file_aio_read,
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
+	.release	= fat_file_release,
 	.ioctl		= fat_generic_ioctl,
 	.fsync		= file_fsync,
 	.sendfile	= generic_file_sendfile,
@@ -289,6 +301,7 @@ void fat_truncate(struct inode *inode)
 	lock_kernel();
 	fat_free(inode, nr_clusters);
 	unlock_kernel();
+	fat_flush_inodes(inode->i_sb, inode, NULL);
 }
 
 struct inode_operations fat_file_inode_operations = {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ab96ae823753..045738032a83 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -24,6 +24,7 @@
 #include <linux/vfs.h>
 #include <linux/parser.h>
 #include <linux/uio.h>
+#include <linux/writeback.h>
 #include <asm/unaligned.h>
 
 #ifndef CONFIG_FAT_DEFAULT_IOCHARSET
@@ -853,7 +854,7 @@ enum {
 	Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
 	Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
 	Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
-	Opt_obsolate, Opt_err,
+	Opt_obsolate, Opt_flush, Opt_err,
 };
 
 static match_table_t fat_tokens = {
@@ -885,7 +886,8 @@ static match_table_t fat_tokens = {
 	{Opt_obsolate, "cvf_format=%20s"},
 	{Opt_obsolate, "cvf_options=%100s"},
 	{Opt_obsolate, "posix"},
-	{Opt_err, NULL}
+	{Opt_flush, "flush"},
+	{Opt_err, NULL},
 };
 static match_table_t msdos_tokens = {
 	{Opt_nodots, "nodots"},
@@ -1026,6 +1028,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
 				return 0;
 			opts->codepage = option;
 			break;
+		case Opt_flush:
+			opts->flush = 1;
+			break;
 
 		/* msdos specific */
 		case Opt_dots:
@@ -1425,6 +1430,56 @@ out_fail:
 
 EXPORT_SYMBOL_GPL(fat_fill_super);
 
+/*
+ * helper function for fat_flush_inodes.  This writes both the inode
+ * and the file data blocks, waiting for in flight data blocks before
+ * the start of the call.  It does not wait for any io started
+ * during the call
+ */
+static int writeback_inode(struct inode *inode)
+{
+
+	int ret;
+	struct address_space *mapping = inode->i_mapping;
+	struct writeback_control wbc = {
+	       .sync_mode = WB_SYNC_NONE,
+	      .nr_to_write = 0,
+	};
+	/* if we used WB_SYNC_ALL, sync_inode waits for the io for the
+	* inode to finish.  So WB_SYNC_NONE is sent down to sync_inode
+	* and filemap_fdatawrite is used for the data blocks
+	*/
+	ret = sync_inode(inode, &wbc);
+	if (!ret)
+	       ret = filemap_fdatawrite(mapping);
+	return ret;
+}
+
+/*
+ * write data and metadata corresponding to i1 and i2.  The io is
+ * started but we do not wait for any of it to finish.
+ *
+ * filemap_flush is used for the block device, so if there is a dirty
+ * page for a block already in flight, we will not wait and start the
+ * io over again
+ */
+int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2)
+{
+	int ret = 0;
+	if (!MSDOS_SB(sb)->options.flush)
+		return 0;
+	if (i1)
+		ret = writeback_inode(i1);
+	if (!ret && i2)
+		ret = writeback_inode(i2);
+	if (!ret && sb) {
+		struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
+		ret = filemap_flush(mapping);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(fat_flush_inodes);
+
 static int __init init_fat_fs(void)
 {
 	int err;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 9e44158a7540..d220165d4918 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -280,7 +280,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
 			struct nameidata *nd)
 {
 	struct super_block *sb = dir->i_sb;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	struct fat_slot_info sinfo;
 	struct timespec ts;
 	unsigned char msdos_name[MSDOS_NAME];
@@ -316,6 +316,8 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
 	d_instantiate(dentry, inode);
 out:
 	unlock_kernel();
+	if (!err)
+		err = fat_flush_inodes(sb, dir, inode);
 	return err;
 }
 
@@ -348,6 +350,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
 	fat_detach(inode);
 out:
 	unlock_kernel();
+	if (!err)
+		err = fat_flush_inodes(inode->i_sb, dir, inode);
 
 	return err;
 }
@@ -401,6 +405,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	d_instantiate(dentry, inode);
 
 	unlock_kernel();
+	fat_flush_inodes(sb, dir, inode);
 	return 0;
 
 out_free:
@@ -430,6 +435,8 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry)
 	fat_detach(inode);
 out:
 	unlock_kernel();
+	if (!err)
+		err = fat_flush_inodes(inode->i_sb, dir, inode);
 
 	return err;
 }
@@ -635,6 +642,8 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry,
 			      new_dir, new_msdos_name, new_dentry, is_hid);
 out:
 	unlock_kernel();
+	if (!err)
+		err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir);
 	return err;
 }
 
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index bae62d62dc3e..ce6c85815cbd 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -204,6 +204,7 @@ struct fat_mount_options {
 		 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
 		 numtail:1,       /* Does first alias have a numeric '~1' type tail? */
 		 atari:1,         /* Use Atari GEMDOS variation of MS-DOS fs */
+		 flush:1,	  /* write things quickly */
 		 nocase:1;	  /* Does this need case conversion? 0=need case conversion*/
 };
 
@@ -412,6 +413,8 @@ extern int fat_sync_inode(struct inode *inode);
 extern int fat_fill_super(struct super_block *sb, void *data, int silent,
 			struct inode_operations *fs_dir_inode_ops, int isvfat);
 
+extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
+		            struct inode *i2);
 /* fat/misc.c */
 extern void fat_fs_panic(struct super_block *s, const char *fmt, ...);
 extern void fat_clusters_flush(struct super_block *sb);
-- 
cgit v1.2.3


From ca9bda00b4aafc42cd3d1b9d32934463e2993b4c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 29 Sep 2006 02:00:03 -0700
Subject: [PATCH] tty locking on resize

The current kernel serializes console resizes but does not serialize the
resize against the tty structure updates.  This means that while two
parallel resizes cannot mess up the console you can get incorrect results
reported.

Secondly while doing this I added vc_lock_resize() to lock and resize the
console.  This leaves all knowledge of the console_sem in the vt/console
driver and kicks it out of the tty layer, which is good

Thirdly while doing this I decided I couldn't stand "disallocate" any
longer so I switched it to "deallocate".

Signed-off-by: Alan Cox <alan@redhat.com>
Cc: Paul Fulghum <paulkf@microgate.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/selection.c |  2 +-
 drivers/char/tty_io.c    | 29 +++++++++++++++--------------
 drivers/char/vt.c        | 12 +++++++++++-
 drivers/char/vt_ioctl.c  | 17 +++++++----------
 include/linux/vt_kern.h  |  3 ++-
 5 files changed, 36 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/selection.c b/drivers/char/selection.c
index 71093a9fc462..74cff839c857 100644
--- a/drivers/char/selection.c
+++ b/drivers/char/selection.c
@@ -33,7 +33,7 @@ extern void poke_blanked_console(void);
 
 /* Variables for selection control. */
 /* Use a dynamic buffer, instead of static (Dec 1994) */
-struct vc_data *sel_cons;		/* must not be disallocated */
+struct vc_data *sel_cons;		/* must not be deallocated */
 static volatile int sel_start = -1; 	/* cleared by clear_selection */
 static int sel_end;
 static int sel_buffer_lth;
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index eb881cfa53e0..2a1e95b0f282 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2770,12 +2770,11 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg)
  *	actually has driver level meaning and triggers a VC resize.
  *
  *	Locking:
- *		The console_sem is used to ensure we do not try and resize
- *	the console twice at once.
- *	FIXME: Two racing size sets may leave the console and kernel
- *		parameters disagreeing. Is this exploitable ?
- *	FIXME: Random values racing a window size get is wrong
- *	should lock here against that
+ *		Called function use the console_sem is used to ensure we do
+ *	not try and resize the console twice at once.
+ *		The tty->termios_sem is used to ensure we don't double
+ *	resize and get confused. Lock order - tty->termios.sem before
+ *	console sem
  */
 
 static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
@@ -2785,17 +2784,17 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
 
 	if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
 		return -EFAULT;
+
+	down(&tty->termios_sem);
 	if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg)))
-		return 0;
+		goto done;
+
 #ifdef CONFIG_VT
 	if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) {
-		int rc;
-
-		acquire_console_sem();
-		rc = vc_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row);
-		release_console_sem();
-		if (rc)
-			return -ENXIO;
+		if (vc_lock_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row)) {
+			up(&tty->termios_sem);
+ 			return -ENXIO;
+		}
 	}
 #endif
 	if (tty->pgrp > 0)
@@ -2804,6 +2803,8 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
 		kill_pg(real_tty->pgrp, SIGWINCH, 1);
 	tty->winsize = tmp_ws;
 	real_tty->winsize = tmp_ws;
+done:
+	up(&tty->termios_sem);
 	return 0;
 }
 
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 0fca83ededff..b49f03375439 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -885,8 +885,17 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines)
 	return err;
 }
 
+int vc_lock_resize(struct vc_data *vc, unsigned int cols, unsigned int lines)
+{
+	int rc;
+
+	acquire_console_sem();
+	rc = vc_resize(vc, cols, lines);
+	release_console_sem();
+	return rc;
+}
 
-void vc_disallocate(unsigned int currcons)
+void vc_deallocate(unsigned int currcons)
 {
 	WARN_CONSOLE_UNLOCKED();
 
@@ -3790,6 +3799,7 @@ EXPORT_SYMBOL(default_blu);
 EXPORT_SYMBOL(update_region);
 EXPORT_SYMBOL(redraw_screen);
 EXPORT_SYMBOL(vc_resize);
+EXPORT_SYMBOL(vc_lock_resize);
 EXPORT_SYMBOL(fg_console);
 EXPORT_SYMBOL(console_blank_hook);
 EXPORT_SYMBOL(console_blanked);
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index a5628a8b6620..a53e382cc107 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -96,7 +96,7 @@ do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, int perm, struct kbd_str
 		if (!perm)
 			return -EPERM;
 		if (!i && v == K_NOSUCHMAP) {
-			/* disallocate map */
+			/* deallocate map */
 			key_map = key_maps[s];
 			if (s && key_map) {
 			    key_maps[s] = NULL;
@@ -819,20 +819,20 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		if (arg > MAX_NR_CONSOLES)
 			return -ENXIO;
 		if (arg == 0) {
-		    /* disallocate all unused consoles, but leave 0 */
+		    /* deallocate all unused consoles, but leave 0 */
 			acquire_console_sem();
 			for (i=1; i<MAX_NR_CONSOLES; i++)
 				if (! VT_BUSY(i))
-					vc_disallocate(i);
+					vc_deallocate(i);
 			release_console_sem();
 		} else {
-			/* disallocate a single console, if possible */
+			/* deallocate a single console, if possible */
 			arg--;
 			if (VT_BUSY(arg))
 				return -EBUSY;
 			if (arg) {			      /* leave 0 */
 				acquire_console_sem();
-				vc_disallocate(arg);
+				vc_deallocate(arg);
 				release_console_sem();
 			}
 		}
@@ -847,11 +847,8 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		if (get_user(ll, &vtsizes->v_rows) ||
 		    get_user(cc, &vtsizes->v_cols))
 			return -EFAULT;
-		for (i = 0; i < MAX_NR_CONSOLES; i++) {
-			acquire_console_sem();
-			vc_resize(vc_cons[i].d, cc, ll);
-			release_console_sem();
-		}
+		for (i = 0; i < MAX_NR_CONSOLES; i++)
+			vc_lock_resize(vc_cons[i].d, cc, ll);
 		return 0;
 	}
 
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 918a29763aea..1009d3fe1fc2 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -33,7 +33,8 @@ extern int fg_console, last_console, want_console;
 int vc_allocate(unsigned int console);
 int vc_cons_allocated(unsigned int console);
 int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines);
-void vc_disallocate(unsigned int console);
+int vc_lock_resize(struct vc_data *vc, unsigned int cols, unsigned int lines);
+void vc_deallocate(unsigned int console);
 void reset_palette(struct vc_data *vc);
 void do_blank_screen(int entering_gfx);
 void do_unblank_screen(int leaving_gfx);
-- 
cgit v1.2.3


From 3b9b8ab65d8eed784b9164d03807cb2bda7b5cd6 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Fri, 29 Sep 2006 02:00:05 -0700
Subject: [PATCH] Fix unserialized task->files changing

Fixed race on put_files_struct on exec with proc.  Restoring files on
current on error path may lead to proc having a pointer to already kfree-d
files_struct.

->files changing at exit.c and khtread.c are safe as exit_files() makes all
things under lock.

Found during OpenVZ stress testing.

[akpm@osdl.org: add export]
Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/binfmt_elf.c      |  6 ++----
 fs/binfmt_misc.c     |  6 ++----
 fs/exec.c            |  3 +--
 include/linux/file.h |  1 +
 kernel/exit.c        | 12 ++++++++++++
 5 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index dfd8cfb7fb5d..bb43da5cde5c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1038,10 +1038,8 @@ out_free_interp:
 out_free_file:
 	sys_close(elf_exec_fileno);
 out_free_fh:
-	if (files) {
-		put_files_struct(current->files);
-		current->files = files;
-	}
+	if (files)
+		reset_files_struct(current, files);
 out_free_ph:
 	kfree(elf_phdata);
 	goto out;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 66ba137f8661..1713c48fef54 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -215,10 +215,8 @@ _error:
 	bprm->interp_flags = 0;
 	bprm->interp_data = 0;
 _unshare:
-	if (files) {
-		put_files_struct(current->files);
-		current->files = files;
-	}
+	if (files)
+		reset_files_struct(current, files);
 	goto _ret;
 }
 
diff --git a/fs/exec.c b/fs/exec.c
index 97df6e0aeaee..a8efe35176b0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -898,8 +898,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	return 0;
 
 mmap_failed:
-	put_files_struct(current->files);
-	current->files = files;
+	reset_files_struct(current, files);
 out:
 	return retval;
 }
diff --git a/include/linux/file.h b/include/linux/file.h
index 9f7c2513866f..74183e6f7f45 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -112,5 +112,6 @@ struct task_struct;
 
 struct files_struct *get_files_struct(struct task_struct *);
 void FASTCALL(put_files_struct(struct files_struct *fs));
+void reset_files_struct(struct task_struct *, struct files_struct *);
 
 #endif /* __LINUX_FILE_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index d891883420f7..4b6fb054b25d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -487,6 +487,18 @@ void fastcall put_files_struct(struct files_struct *files)
 
 EXPORT_SYMBOL(put_files_struct);
 
+void reset_files_struct(struct task_struct *tsk, struct files_struct *files)
+{
+	struct files_struct *old;
+
+	old = tsk->files;
+	task_lock(tsk);
+	tsk->files = files;
+	task_unlock(tsk);
+	put_files_struct(old);
+}
+EXPORT_SYMBOL(reset_files_struct);
+
 static inline void __exit_files(struct task_struct *tsk)
 {
 	struct files_struct * files = tsk->files;
-- 
cgit v1.2.3


From f400e198b2ed26ce55b22a1412ded0896e7516ac Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Date: Fri, 29 Sep 2006 02:00:07 -0700
Subject: [PATCH] pidspace: is_init()

This is an updated version of Eric Biederman's is_init() patch.
(http://lkml.org/lkml/2006/2/6/280).  It applies cleanly to 2.6.18-rc3 and
replaces a few more instances of ->pid == 1 with is_init().

Further, is_init() checks pid and thus removes dependency on Eric's other
patches for now.

Eric's original description:

	There are a lot of places in the kernel where we test for init
	because we give it special properties.  Most  significantly init
	must not die.  This results in code all over the kernel test
	->pid == 1.

	Introduce is_init to capture this case.

	With multiple pid spaces for all of the cases affected we are
	looking for only the first process on the system, not some other
	process that has pid == 1.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: <lxc-devel@lists.sourceforge.net>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/alpha/mm/fault.c                |  2 +-
 arch/arm/mm/fault.c                  |  2 +-
 arch/arm26/mm/fault.c                |  2 +-
 arch/i386/lib/usercopy.c             |  2 +-
 arch/i386/mm/fault.c                 |  2 +-
 arch/ia64/mm/fault.c                 |  2 +-
 arch/m32r/mm/fault.c                 |  2 +-
 arch/m68k/mm/fault.c                 |  2 +-
 arch/mips/mm/fault.c                 |  2 +-
 arch/powerpc/mm/fault.c              |  2 +-
 arch/powerpc/platforms/pseries/ras.c |  2 +-
 arch/ppc/kernel/traps.c              |  2 +-
 arch/ppc/mm/fault.c                  |  2 +-
 arch/s390/mm/fault.c                 |  2 +-
 arch/sh/mm/fault.c                   |  2 +-
 arch/sh64/mm/fault.c                 |  6 +++---
 arch/um/kernel/trap.c                |  2 +-
 arch/x86_64/mm/fault.c               |  4 ++--
 arch/xtensa/mm/fault.c               |  2 +-
 drivers/char/sysrq.c                 |  2 +-
 include/linux/sched.h                | 10 ++++++++++
 kernel/capability.c                  |  2 +-
 kernel/cpuset.c                      |  2 +-
 kernel/exit.c                        |  2 +-
 kernel/kexec.c                       |  2 +-
 kernel/ptrace.c                      |  1 +
 kernel/sysctl.c                      |  2 +-
 mm/oom_kill.c                        |  2 +-
 security/commoncap.c                 |  2 +-
 29 files changed, 41 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 622dabd84680..8871529a34e2 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -193,7 +193,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	/* We ran out of memory, or some other thing happened to us that
 	   made us unable to handle the page fault gracefully.  */
  out_of_memory:
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index a5b33ff3924e..5e658a874498 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -198,7 +198,7 @@ survive:
 		return fault;
 	}
 
-	if (tsk->pid != 1)
+	if (!is_init(tsk))
 		goto out;
 
 	/*
diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c
index a7c4cc922095..a1f6d8a9cc32 100644
--- a/arch/arm26/mm/fault.c
+++ b/arch/arm26/mm/fault.c
@@ -185,7 +185,7 @@ survive:
 	}
 
 	fault = -3; /* out of memory */
-	if (tsk->pid != 1)
+	if (!is_init(tsk))
 		goto out;
 
 	/*
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index efc7e7d5f4d0..08502fc6d0cb 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -739,7 +739,7 @@ survive:
 			retval = get_user_pages(current, current->mm,
 					(unsigned long )to, 1, 1, 0, &pg, NULL);
 
-			if (retval == -ENOMEM && current->pid == 1) {
+			if (retval == -ENOMEM && is_init(current)) {
 				up_read(&current->mm->mmap_sem);
 				blk_congestion_wait(WRITE, HZ/50);
 				goto survive;
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 50d8617391dd..2581575786c1 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -589,7 +589,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (tsk->pid == 1) {
+	if (is_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index d8b1b4ac7f26..59f3ab937615 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -280,7 +280,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 
   out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index dc18a33eefef..8d5f551b5754 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -299,7 +299,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (tsk->pid == 1) {
+	if (is_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 5e2d87c10c87..911f2ce3f53e 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -181,7 +181,7 @@ good_area:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index a4f8c45c4e8e..8423d8590779 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -171,7 +171,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (tsk->pid == 1) {
+	if (is_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 77953f41d754..e8fa50624b70 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -386,7 +386,7 @@ bad_area_nosemaphore:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 903115d67fdc..311ed1993fc0 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -337,7 +337,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
 		   err->disposition == RTAS_DISP_NOT_RECOVERED &&
 		   err->target == RTAS_TARGET_MEMORY &&
 		   err->type == RTAS_TYPE_ECC_UNCORR &&
-		   !(current->pid == 0 || current->pid == 1)) {
+		   !(current->pid == 0 || is_init(current))) {
 		/* Kill off a user process with an ECC error */
 		printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
 		       current->pid);
diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c
index d7a433049b48..aafc8e8893d1 100644
--- a/arch/ppc/kernel/traps.c
+++ b/arch/ppc/kernel/traps.c
@@ -119,7 +119,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 	 * generate the same exception over and over again and we get
 	 * nowhere.  Better to kill it and let the kernel panic.
 	 */
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		__sighandler_t handler;
 
 		spin_lock_irq(&current->sighand->siglock);
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c
index bc776beb3136..465f451f3bc3 100644
--- a/arch/ppc/mm/fault.c
+++ b/arch/ppc/mm/fault.c
@@ -291,7 +291,7 @@ bad_area:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index f2b9a84dc2bf..9c3c19fe62fc 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -353,7 +353,7 @@ no_context:
 */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (tsk->pid == 1) {
+	if (is_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 507f28914706..68663b8f99ae 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -149,7 +149,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c
index f08d0eaf6497..8e2f6c28b739 100644
--- a/arch/sh64/mm/fault.c
+++ b/arch/sh64/mm/fault.c
@@ -277,7 +277,7 @@ bad_area:
 			show_regs(regs);
 #endif
 		}
-		if (tsk->pid == 1) {
+		if (is_init(tsk)) {
 			panic("INIT had user mode bad_area\n");
 		}
 		tsk->thread.address = address;
@@ -319,14 +319,14 @@ no_context:
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		panic("INIT out of memory\n");
 		yield();
 		goto survive;
 	}
 	printk("fault:Out of memory\n");
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 61a23fff4395..c7b195c7e51f 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -120,7 +120,7 @@ out_nosemaphore:
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		up_read(&mm->mmap_sem);
 		yield();
 		down_read(&mm->mmap_sem);
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 9ba54cc2b5f6..3751b4788e28 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -244,7 +244,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
 
 int unhandled_signal(struct task_struct *tsk, int sig)
 {
-	if (tsk->pid == 1)
+	if (is_init(tsk))
 		return 1;
 	if (tsk->ptrace & PT_PTRACED)
 		return 0;
@@ -580,7 +580,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) { 
+	if (is_init(current)) {
 		yield();
 		goto again;
 	}
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index a945a33e85a1..dd0dbec2e57e 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -144,7 +144,7 @@ bad_area:
 	 */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (current->pid == 1) {
+	if (is_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index ee3ca8f1768e..0ad6cb081db4 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -208,7 +208,7 @@ static void send_sig_all(int sig)
 	struct task_struct *p;
 
 	for_each_process(p) {
-		if (p->mm && p->pid != 1)
+		if (p->mm && !is_init(p))
 			/* Not swapper, init nor kernel thread */
 			force_sig(sig, p);
 	}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3696f2f7126d..ed2af8671589 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1033,6 +1033,16 @@ static inline int pid_alive(struct task_struct *p)
 	return p->pids[PIDTYPE_PID].pid != NULL;
 }
 
+/**
+ * is_init - check if a task structure is the first user space
+ *	     task the kernel created.
+ * @p: Task structure to be checked.
+ */
+static inline int is_init(struct task_struct *tsk)
+{
+	return tsk->pid == 1;
+}
+
 extern void free_task(struct task_struct *tsk);
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
 
diff --git a/kernel/capability.c b/kernel/capability.c
index c7685ad00a97..edb845a6e84a 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -133,7 +133,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
      int found = 0;
 
      do_each_thread(g, target) {
-             if (target == current || target->pid == 1)
+             if (target == current || is_init(target))
                      continue;
              found = 1;
 	     if (security_capset_check(target, effective, inheritable,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1b32c2c04c15..584bb4e6c042 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -240,7 +240,7 @@ static struct super_block *cpuset_sb;
  * A cpuset can only be deleted if both its 'count' of using tasks
  * is zero, and its list of 'children' cpusets is empty.  Since all
  * tasks in the system use _some_ cpuset, and since there is always at
- * least one task in the system (init, pid == 1), therefore, top_cpuset
+ * least one task in the system (init), therefore, top_cpuset
  * always has either children cpusets and/or using tasks.  So we don't
  * need a special hack to ensure that top_cpuset cannot be deleted.
  *
diff --git a/kernel/exit.c b/kernel/exit.c
index 4b6fb054b25d..9961192d6055 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -219,7 +219,7 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
 	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
 		if (p == ignored_task
 				|| p->exit_state
-				|| p->real_parent->pid == 1)
+				|| is_init(p->real_parent))
 			continue;
 		if (process_group(p->real_parent) != pgrp
 			    && p->real_parent->signal->session == p->signal->session) {
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 50087ecf337e..37cad75cf494 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -40,7 +40,7 @@ struct resource crashk_res = {
 
 int kexec_should_crash(struct task_struct *p)
 {
-	if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops)
+	if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops)
 		return 1;
 	return 0;
 }
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 8aad0331d82e..4d50e06fd745 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -440,6 +440,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
 	child = find_task_by_pid(pid);
 	if (child)
 		get_task_struct(child);
+
 	read_unlock(&tasklist_lock);
 	if (!child)
 		return ERR_PTR(-ESRCH);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8bfa7d117c54..9535a3839930 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1915,7 +1915,7 @@ int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
 		return -EPERM;
 	}
 
-	op = (current->pid == 1) ? OP_SET : OP_AND;
+	op = is_init(current) ? OP_SET : OP_AND;
 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
 				do_proc_dointvec_bset_conv,&op);
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bada3d03119f..f3dd79c1c367 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -255,7 +255,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
  */
 static void __oom_kill_task(struct task_struct *p, const char *message)
 {
-	if (p->pid == 1) {
+	if (is_init(p)) {
 		WARN_ON(1);
 		printk(KERN_WARNING "tried to kill init!\n");
 		return;
diff --git a/security/commoncap.c b/security/commoncap.c
index f50fc298cf80..5a5ef5ca7ea9 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -169,7 +169,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 	/* For init, we want to retain the capabilities set
 	 * in the init_task struct. Thus we skip the usual
 	 * capability rules */
-	if (current->pid != 1) {
+	if (!is_init(current)) {
 		current->cap_permitted = new_permitted;
 		current->cap_effective =
 		    cap_intersect (new_permitted, bprm->cap_effective);
-- 
cgit v1.2.3


From 3ca212b813299899d2968aa0a24a797c3746f5ec Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 29 Sep 2006 02:00:14 -0700
Subject: [PATCH] Remove another config.h

After the asm/ uses of #include <linux/config.h> this one is the next
biggest source of noise.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/vmstat.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 176c7f797339..c89df55f6e03 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -3,7 +3,6 @@
 
 #include <linux/types.h>
 #include <linux/percpu.h>
-#include <linux/config.h>
 #include <linux/mmzone.h>
 #include <asm/atomic.h>
 
-- 
cgit v1.2.3


From af410fc13d95f079910fc3dca7496590c3275967 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 29 Sep 2006 02:00:14 -0700
Subject: [PATCH] make leds.h include relevant headers

Make it possible to include linux/leds.h without first including list.h and
spinlock.h.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/leds.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index dc23c7c639f3..88afceffb7cb 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -12,6 +12,9 @@
 #ifndef __LINUX_LEDS_H_INCLUDED
 #define __LINUX_LEDS_H_INCLUDED
 
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
 struct device;
 struct class_device;
 /*
-- 
cgit v1.2.3


From 1711ef3866b0360e102327389fe4b76c849bbe83 Mon Sep 17 00:00:00 2001
From: Toyo Abe <toyoa@mvista.com>
Date: Fri, 29 Sep 2006 02:00:28 -0700
Subject: [PATCH] posix-timers: Fix clock_nanosleep() doesn't return the
 remaining time in compatibility mode

The clock_nanosleep() function does not return the time remaining when the
sleep is interrupted by a signal.

This patch creates a new call out, compat_clock_nanosleep_restart(), which
handles returning the remaining time after a sleep is interrupted.  This
patch revives clock_nanosleep_restart().  It is now accessed via the new
call out.  The compat_clock_nanosleep_restart() is used for compatibility
access.

Since this is implemented in compatibility mode the normal path is
virtually unaffected - no real performance impact.

Signed-off-by: Toyo Abe <toyoa@mvista.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hrtimer.h      |  1 +
 include/linux/posix-timers.h |  4 ++++
 kernel/compat.c              | 33 +++++++++++++++++++++++++++++++++
 kernel/hrtimer.c             | 20 ++++++++++----------
 kernel/posix-cpu-timers.c    | 17 ++++++++++++-----
 kernel/posix-timers.c        | 21 +++++++++++++++++++++
 6 files changed, 81 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4fc379de6c2f..fca93025ab51 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -138,6 +138,7 @@ extern long hrtimer_nanosleep(struct timespec *rqtp,
 			      struct timespec __user *rmtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
+extern long hrtimer_nanosleep_restart(struct restart_block *restart_block);
 
 extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 				 struct task_struct *tsk);
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 95572c434bc9..a7dd38f30ade 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -72,6 +72,7 @@ struct k_clock {
 	int (*timer_create) (struct k_itimer *timer);
 	int (*nsleep) (const clockid_t which_clock, int flags,
 		       struct timespec *, struct timespec __user *);
+	long (*nsleep_restart) (struct restart_block *restart_block);
 	int (*timer_set) (struct k_itimer * timr, int flags,
 			  struct itimerspec * new_setting,
 			  struct itimerspec * old_setting);
@@ -97,6 +98,7 @@ int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts);
 int posix_cpu_timer_create(struct k_itimer *timer);
 int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 		     struct timespec *rqtp, struct timespec __user *rmtp);
+long posix_cpu_nsleep_restart(struct restart_block *restart_block);
 int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 			struct itimerspec *new, struct itimerspec *old);
 int posix_cpu_timer_del(struct k_itimer *timer);
@@ -111,4 +113,6 @@ void posix_cpu_timers_exit_group(struct task_struct *task);
 void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 			   cputime_t *newval, cputime_t *oldval);
 
+long clock_nanosleep_restart(struct restart_block *restart_block);
+
 #endif
diff --git a/kernel/compat.c b/kernel/compat.c
index 126dee9530aa..75573e5d27b0 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -22,6 +22,7 @@
 #include <linux/security.h>
 #include <linux/timex.h>
 #include <linux/migrate.h>
+#include <linux/posix-timers.h>
 
 #include <asm/uaccess.h>
 
@@ -601,6 +602,30 @@ long compat_sys_clock_getres(clockid_t which_clock,
 	return err;
 } 
 
+static long compat_clock_nanosleep_restart(struct restart_block *restart)
+{
+	long err;
+	mm_segment_t oldfs;
+	struct timespec tu;
+	struct compat_timespec *rmtp = (struct compat_timespec *)(restart->arg1);
+
+	restart->arg1 = (unsigned long) &tu;
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = clock_nanosleep_restart(restart);
+	set_fs(oldfs);
+
+	if ((err == -ERESTART_RESTARTBLOCK) && rmtp &&
+	    put_compat_timespec(&tu, rmtp))
+		return -EFAULT;
+
+	if (err == -ERESTART_RESTARTBLOCK) {
+		restart->fn = compat_clock_nanosleep_restart;
+		restart->arg1 = (unsigned long) rmtp;
+	}
+	return err;
+}
+
 long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
 			    struct compat_timespec __user *rqtp,
 			    struct compat_timespec __user *rmtp)
@@ -608,6 +633,7 @@ long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
 	long err;
 	mm_segment_t oldfs;
 	struct timespec in, out; 
+	struct restart_block *restart;
 
 	if (get_compat_timespec(&in, rqtp)) 
 		return -EFAULT;
@@ -618,9 +644,16 @@ long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
 				  (struct timespec __user *) &in,
 				  (struct timespec __user *) &out);
 	set_fs(oldfs);
+
 	if ((err == -ERESTART_RESTARTBLOCK) && rmtp &&
 	    put_compat_timespec(&out, rmtp))
 		return -EFAULT;
+
+	if (err == -ERESTART_RESTARTBLOCK) {
+		restart = &current_thread_info()->restart_block;
+		restart->fn = compat_clock_nanosleep_restart;
+		restart->arg1 = (unsigned long) rmtp;
+	}
 	return err;	
 } 
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 21c38a7e666b..d0ba190dfeb6 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -693,7 +693,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 	return t->task == NULL;
 }
 
-static long __sched nanosleep_restart(struct restart_block *restart)
+long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 {
 	struct hrtimer_sleeper t;
 	struct timespec __user *rmtp;
@@ -702,13 +702,13 @@ static long __sched nanosleep_restart(struct restart_block *restart)
 
 	restart->fn = do_no_restart_syscall;
 
-	hrtimer_init(&t.timer, restart->arg3, HRTIMER_ABS);
-	t.timer.expires.tv64 = ((u64)restart->arg1 << 32) | (u64) restart->arg0;
+	hrtimer_init(&t.timer, restart->arg0, HRTIMER_ABS);
+	t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2;
 
 	if (do_nanosleep(&t, HRTIMER_ABS))
 		return 0;
 
-	rmtp = (struct timespec __user *) restart->arg2;
+	rmtp = (struct timespec __user *) restart->arg1;
 	if (rmtp) {
 		time = ktime_sub(t.timer.expires, t.timer.base->get_time());
 		if (time.tv64 <= 0)
@@ -718,7 +718,7 @@ static long __sched nanosleep_restart(struct restart_block *restart)
 			return -EFAULT;
 	}
 
-	restart->fn = nanosleep_restart;
+	restart->fn = hrtimer_nanosleep_restart;
 
 	/* The other values in restart are already filled in */
 	return -ERESTART_RESTARTBLOCK;
@@ -751,11 +751,11 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 	}
 
 	restart = &current_thread_info()->restart_block;
-	restart->fn = nanosleep_restart;
-	restart->arg0 = t.timer.expires.tv64 & 0xFFFFFFFF;
-	restart->arg1 = t.timer.expires.tv64 >> 32;
-	restart->arg2 = (unsigned long) rmtp;
-	restart->arg3 = (unsigned long) t.timer.base->index;
+	restart->fn = hrtimer_nanosleep_restart;
+	restart->arg0 = (unsigned long) t.timer.base->index;
+	restart->arg1 = (unsigned long) rmtp;
+	restart->arg2 = t.timer.expires.tv64 & 0xFFFFFFFF;
+	restart->arg3 = t.timer.expires.tv64 >> 32;
 
 	return -ERESTART_RESTARTBLOCK;
 }
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index d38d9ec3276c..5667fef89dd1 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1393,8 +1393,6 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	}
 }
 
-static long posix_cpu_clock_nanosleep_restart(struct restart_block *);
-
 int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 		     struct timespec *rqtp, struct timespec __user *rmtp)
 {
@@ -1471,7 +1469,7 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 		    copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
 			return -EFAULT;
 
-		restart_block->fn = posix_cpu_clock_nanosleep_restart;
+		restart_block->fn = posix_cpu_nsleep_restart;
 		/* Caller already set restart_block->arg1 */
 		restart_block->arg0 = which_clock;
 		restart_block->arg1 = (unsigned long) rmtp;
@@ -1484,8 +1482,7 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags,
 	return error;
 }
 
-static long
-posix_cpu_clock_nanosleep_restart(struct restart_block *restart_block)
+long posix_cpu_nsleep_restart(struct restart_block *restart_block)
 {
 	clockid_t which_clock = restart_block->arg0;
 	struct timespec __user *rmtp;
@@ -1524,6 +1521,10 @@ static int process_cpu_nsleep(const clockid_t which_clock, int flags,
 {
 	return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
 }
+static long process_cpu_nsleep_restart(struct restart_block *restart_block)
+{
+	return -EINVAL;
+}
 static int thread_cpu_clock_getres(const clockid_t which_clock,
 				   struct timespec *tp)
 {
@@ -1544,6 +1545,10 @@ static int thread_cpu_nsleep(const clockid_t which_clock, int flags,
 {
 	return -EINVAL;
 }
+static long thread_cpu_nsleep_restart(struct restart_block *restart_block)
+{
+	return -EINVAL;
+}
 
 static __init int init_posix_cpu_timers(void)
 {
@@ -1553,6 +1558,7 @@ static __init int init_posix_cpu_timers(void)
 		.clock_set = do_posix_clock_nosettime,
 		.timer_create = process_cpu_timer_create,
 		.nsleep = process_cpu_nsleep,
+		.nsleep_restart = process_cpu_nsleep_restart,
 	};
 	struct k_clock thread = {
 		.clock_getres = thread_cpu_clock_getres,
@@ -1560,6 +1566,7 @@ static __init int init_posix_cpu_timers(void)
 		.clock_set = do_posix_clock_nosettime,
 		.timer_create = thread_cpu_timer_create,
 		.nsleep = thread_cpu_nsleep,
+		.nsleep_restart = thread_cpu_nsleep_restart,
 	};
 
 	register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index ac6dc8744429..e5ebcc1ec3a0 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -973,3 +973,24 @@ sys_clock_nanosleep(const clockid_t which_clock, int flags,
 	return CLOCK_DISPATCH(which_clock, nsleep,
 			      (which_clock, flags, &t, rmtp));
 }
+
+/*
+ * nanosleep_restart for monotonic and realtime clocks
+ */
+static int common_nsleep_restart(struct restart_block *restart_block)
+{
+	return hrtimer_nanosleep_restart(restart_block);
+}
+
+/*
+ * This will restart clock_nanosleep. This is required only by
+ * compat_clock_nanosleep_restart for now.
+ */
+long
+clock_nanosleep_restart(struct restart_block *restart_block)
+{
+	clockid_t which_clock = restart_block->arg0;
+
+	return CLOCK_DISPATCH(which_clock, nsleep_restart,
+			      (restart_block));
+}
-- 
cgit v1.2.3


From 3171a0305d62e6627a24bff35af4f997e4988a80 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Fri, 29 Sep 2006 02:00:32 -0700
Subject: [PATCH] simplify update_times (avoid jiffies/jiffies_64 aliasing
 problem)

Pass ticks to do_timer() and update_times(), and adjust x86_64 and s390
timer interrupt handler with this change.

Currently update_times() calculates ticks by "jiffies - wall_jiffies", but
callers of do_timer() should know how many ticks to update.  Passing ticks
get rid of this redundant calculation.  Also there are another redundancy
pointed out by Martin Schwidefsky.

This cleanup make a barrier added by
5aee405c662ca644980c184774277fc6d0769a84 needless.  So this patch removes
it.

As a bonus, this cleanup make wall_jiffies can be removed easily, since now
wall_jiffies is always synced with jiffies.  (This patch does not really
remove wall_jiffies.  It would be another cleanup patch)

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Acked-by: Russell King <rmk@arm.linux.org.uk>
Cc: Ian Molton <spyro@f2s.com>
Cc: Mikael Starvik <starvik@axis.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Hirokazu Takata <takata.hirokazu@renesas.com>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
Cc: Richard Curnow <rc@rc0.org.uk>
Cc: William Lee Irwin III <wli@holomorphy.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp>
Cc: Chris Zankel <chris@zankel.net>
Acked-by: "Luck, Tony" <tony.luck@intel.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/alpha/kernel/time.c                 |  2 +-
 arch/arm/kernel/time.c                   |  2 +-
 arch/arm26/kernel/time.c                 |  2 +-
 arch/avr32/kernel/time.c                 |  2 +-
 arch/cris/arch-v10/kernel/time.c         |  2 +-
 arch/cris/arch-v32/kernel/time.c         |  2 +-
 arch/frv/kernel/time.c                   |  2 +-
 arch/h8300/kernel/time.c                 |  2 +-
 arch/ia64/kernel/time.c                  |  2 +-
 arch/m32r/kernel/time.c                  |  2 +-
 arch/m68k/kernel/time.c                  |  2 +-
 arch/m68k/sun3/sun3ints.c                |  2 +-
 arch/m68knommu/kernel/time.c             |  2 +-
 arch/mips/au1000/common/time.c           |  6 +++---
 arch/mips/gt64120/common/time.c          |  2 +-
 arch/mips/kernel/time.c                  |  2 +-
 arch/mips/momentum/ocelot_g/gt-irq.c     |  2 +-
 arch/mips/sgi-ip27/ip27-timer.c          |  2 +-
 arch/parisc/kernel/time.c                |  2 +-
 arch/powerpc/kernel/time.c               |  2 +-
 arch/ppc/kernel/time.c                   |  2 +-
 arch/s390/kernel/time.c                  |  9 ++++-----
 arch/sh/kernel/time.c                    |  2 +-
 arch/sh64/kernel/time.c                  |  2 +-
 arch/sparc/kernel/pcic.c                 |  2 +-
 arch/sparc/kernel/time.c                 |  2 +-
 arch/sparc64/kernel/time.c               |  4 ++--
 arch/um/kernel/time.c                    |  2 +-
 arch/v850/kernel/time.c                  |  2 +-
 arch/x86_64/kernel/time.c                |  8 ++++----
 arch/xtensa/kernel/time.c                |  2 +-
 include/asm-arm/arch-clps711x/time.h     |  2 +-
 include/asm-arm/arch-l7200/time.h        |  2 +-
 include/asm-i386/mach-default/do_timer.h |  2 +-
 include/asm-i386/mach-visws/do_timer.h   |  2 +-
 include/asm-i386/mach-voyager/do_timer.h |  2 +-
 include/linux/sched.h                    |  2 +-
 kernel/timer.c                           | 19 ++++++-------------
 38 files changed, 52 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index b191cc759737..7c1e44420a78 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -132,7 +132,7 @@ irqreturn_t timer_interrupt(int irq, void *dev, struct pt_regs * regs)
 	nticks = delta >> FIX_SHIFT;
 
 	while (nticks > 0) {
-		do_timer(regs);
+		do_timer(1);
 #ifndef CONFIG_SMP
 		update_process_times(user_mode(regs));
 #endif
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index d4dceb5f06e9..f7d5165796ef 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -337,7 +337,7 @@ void timer_tick(struct pt_regs *regs)
 	profile_tick(CPU_PROFILING, regs);
 	do_leds();
 	do_set_rtc();
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/arm26/kernel/time.c b/arch/arm26/kernel/time.c
index db63d75d0715..80adbd005fc5 100644
--- a/arch/arm26/kernel/time.c
+++ b/arch/arm26/kernel/time.c
@@ -194,7 +194,7 @@ EXPORT_SYMBOL(do_settimeofday);
 
 static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
-        do_timer(regs);
+        do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index b0e6b5855a38..3e56b9f4358a 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -148,7 +148,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	 * Call the generic timer interrupt handler
 	 */
 	write_seqlock(&xtime_lock);
-	do_timer(regs);
+	do_timer(1);
 	write_sequnlock(&xtime_lock);
 
 	/*
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 9c22b76e129a..ebacf1457d91 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -227,7 +227,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	
 	/* call the real timer interrupt handler */
 
-	do_timer(regs);
+	do_timer(1);
 	
         cris_do_profile(regs); /* Save profiling information */
 
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index 50f3f93293d6..be0a01657d4f 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -219,7 +219,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 		return IRQ_HANDLED;
 
 	/* call the real timer interrupt handler */
-	do_timer(regs);
+	do_timer(1);
 
 	/*
 	 * If we have an externally synchronized Linux clock, then update
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 3d0284bccb94..7e55884135ed 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -70,7 +70,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs)
 	 */
 	write_seqlock(&xtime_lock);
 
-	do_timer(regs);
+	do_timer(1);
 	update_process_times(user_mode(regs));
 	profile_tick(CPU_PROFILING, regs);
 
diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c
index 688a5100604c..e569d17b4ae6 100644
--- a/arch/h8300/kernel/time.c
+++ b/arch/h8300/kernel/time.c
@@ -41,7 +41,7 @@ static void timer_interrupt(int irq, void *dummy, struct pt_regs * regs)
 	/* may need to kick the hardware timer */
 	platform_timer_eoi();
 
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 6928ef0d64d8..16262687a103 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -78,7 +78,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
 			 * xtime_lock.
 			 */
 			write_seqlock(&xtime_lock);
-			do_timer(regs);
+			do_timer(1);
 			local_cpu_data->itm_next = new_itm;
 			write_sequnlock(&xtime_lock);
 		} else
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index ded0be07a476..7a896893cd28 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -202,7 +202,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 #ifndef CONFIG_SMP
 	profile_tick(CPU_PROFILING, regs);
 #endif
-	do_timer(regs);
+	do_timer(1);
 
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 98e4b1adfa29..1072e4946a4a 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -40,7 +40,7 @@ static inline int set_rtc_mmss(unsigned long nowtime)
  */
 static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs)
 {
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index f18b9d3ef16d..dc4ea7e074a6 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -65,7 +65,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id, struct pt_regs *fp)
 #ifdef CONFIG_SUN3
 	intersil_clear();
 #endif
-        do_timer(fp);
+        do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(fp));
 #endif
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index 1db987272220..db1e1ce0a349 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -51,7 +51,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs)
 
 	write_seqlock(&xtime_lock);
 
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/mips/au1000/common/time.c b/arch/mips/au1000/common/time.c
index 7fbea1bf7b48..0a067f3113a5 100644
--- a/arch/mips/au1000/common/time.c
+++ b/arch/mips/au1000/common/time.c
@@ -96,7 +96,7 @@ void mips_timer_interrupt(struct pt_regs *regs)
 		timerlo = count;
 
 		kstat_this_cpu.irqs[irq]++;
-		do_timer(regs);
+		do_timer(1);
 #ifndef CONFIG_SMP
 		update_process_times(user_mode(regs));
 #endif
@@ -137,7 +137,7 @@ irqreturn_t counter0_irq(int irq, void *dev_id, struct pt_regs *regs)
 	}
 
 	while (time_elapsed > 0) {
-		do_timer(regs);
+		do_timer(1);
 #ifndef CONFIG_SMP
 		update_process_times(user_mode(regs));
 #endif
@@ -156,7 +156,7 @@ irqreturn_t counter0_irq(int irq, void *dev_id, struct pt_regs *regs)
 
 	if (jiffie_drift >= 999) {
 		jiffie_drift -= 999;
-		do_timer(regs); /* increment jiffies by one */
+		do_timer(1); /* increment jiffies by one */
 #ifndef CONFIG_SMP
 		update_process_times(user_mode(regs));
 #endif
diff --git a/arch/mips/gt64120/common/time.c b/arch/mips/gt64120/common/time.c
index d837b26fbe51..7feca49350d1 100644
--- a/arch/mips/gt64120/common/time.c
+++ b/arch/mips/gt64120/common/time.c
@@ -34,7 +34,7 @@ static void gt64120_irq(int irq, void *dev_id, struct pt_regs *regs)
 	if (irq_src & 0x00000800) {	/* Check for timer interrupt */
 		handled = 1;
 		irq_src &= ~0x00000800;
-		do_timer(regs);
+		do_timer(1);
 #ifndef CONFIG_SMP
 		update_process_times(user_mode(regs));
 #endif
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index 170cb67f4ede..6ab8d975a974 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -434,7 +434,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	/*
 	 * call the generic timer interrupt handling
 	 */
-	do_timer(regs);
+	do_timer(1);
 
 	/*
 	 * If we have an externally synchronized Linux clock, then update
diff --git a/arch/mips/momentum/ocelot_g/gt-irq.c b/arch/mips/momentum/ocelot_g/gt-irq.c
index 9fb2493fff02..6cd87cf0195a 100644
--- a/arch/mips/momentum/ocelot_g/gt-irq.c
+++ b/arch/mips/momentum/ocelot_g/gt-irq.c
@@ -133,7 +133,7 @@ static irqreturn_t gt64240_p0int_irq(int irq, void *dev, struct pt_regs *regs)
 		MV_WRITE(TIMER_COUNTER_0_3_INTERRUPT_CAUSE, 0x0);
 
 		/* handle the timer call */
-		do_timer(regs);
+		do_timer(1);
 #ifndef CONFIG_SMP
 		update_process_times(user_mode(regs));
 #endif
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index b029ba79c27a..c62a3a9ef867 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -111,7 +111,7 @@ again:
 	kstat_this_cpu.irqs[irq]++;		/* kstat only for bootcpu? */
 
 	if (cpu == 0)
-		do_timer(regs);
+		do_timer(1);
 
 	update_process_times(user_mode(regs));
 
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 5facc9bff4ef..700df10924dd 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -79,7 +79,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 #endif
 		if (cpu == 0) {
 			write_seqlock(&xtime_lock);
-			do_timer(regs);
+			do_timer(1);
 			write_sequnlock(&xtime_lock);
 		}
 	}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 7a3c3f791ade..71f71da98e7d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -693,7 +693,7 @@ void timer_interrupt(struct pt_regs * regs)
 		tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy;
 		if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) {
 			tb_last_jiffy = tb_next_jiffy;
-			do_timer(regs);
+			do_timer(1);
 			timer_recalc_offset(tb_last_jiffy);
 			timer_check_rtc();
 		}
diff --git a/arch/ppc/kernel/time.c b/arch/ppc/kernel/time.c
index 6ab8cc7226ab..1e1f31554767 100644
--- a/arch/ppc/kernel/time.c
+++ b/arch/ppc/kernel/time.c
@@ -153,7 +153,7 @@ void timer_interrupt(struct pt_regs * regs)
 		/* We are in an interrupt, no need to save/restore flags */
 		write_seqlock(&xtime_lock);
 		tb_last_stamp = jiffy_stamp;
-		do_timer(regs);
+		do_timer(1);
 
 		/*
 		 * update the rtc when needed, this should be performed on the
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 1981c6199fa2..abab42e9f5f8 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -166,7 +166,7 @@ EXPORT_SYMBOL(do_settimeofday);
 void account_ticks(struct pt_regs *regs)
 {
 	__u64 tmp;
-	__u32 ticks, xticks;
+	__u32 ticks;
 
 	/* Calculate how many ticks have passed. */
 	if (S390_lowcore.int_clock < S390_lowcore.jiffy_timer) {
@@ -204,6 +204,7 @@ void account_ticks(struct pt_regs *regs)
 	 */
 	write_seqlock(&xtime_lock);
 	if (S390_lowcore.jiffy_timer > xtime_cc) {
+		__u32 xticks;
 		tmp = S390_lowcore.jiffy_timer - xtime_cc;
 		if (tmp >= 2*CLK_TICKS_PER_JIFFY) {
 			xticks = __div(tmp, CLK_TICKS_PER_JIFFY);
@@ -212,13 +213,11 @@ void account_ticks(struct pt_regs *regs)
 			xticks = 1;
 			xtime_cc += CLK_TICKS_PER_JIFFY;
 		}
-		while (xticks--)
-			do_timer(regs);
+		do_timer(xticks);
 	}
 	write_sequnlock(&xtime_lock);
 #else
-	for (xticks = ticks; xticks > 0; xticks--)
-		do_timer(regs);
+	do_timer(ticks);
 #endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 149d9713eddf..f664a196c4f5 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -117,7 +117,7 @@ static long last_rtc_update;
  */
 void handle_timer_tick(struct pt_regs *regs)
 {
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/sh64/kernel/time.c b/arch/sh64/kernel/time.c
index b8162e59030e..3b61e06f9d72 100644
--- a/arch/sh64/kernel/time.c
+++ b/arch/sh64/kernel/time.c
@@ -298,7 +298,7 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
 	asm ("getcon cr62, %0" : "=r" (current_ctc));
 	ctc_last_interrupt = (unsigned long) current_ctc;
 
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index bfd31aac2df3..e19b1bad9bc5 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -712,7 +712,7 @@ static irqreturn_t pcic_timer_handler (int irq, void *h, struct pt_regs *regs)
 {
 	write_seqlock(&xtime_lock);	/* Dummy, to show that we remember */
 	pcic_clear_clock_irq();
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c
index 845081b01267..6f84fa1b58e5 100644
--- a/arch/sparc/kernel/time.c
+++ b/arch/sparc/kernel/time.c
@@ -128,7 +128,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 #endif
 	clear_clock_irq();
 
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index b0b4feeec098..ca1193482f07 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -465,7 +465,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 		profile_tick(CPU_PROFILING, regs);
 		update_process_times(user_mode(regs));
 #endif
-		do_timer(regs);
+		do_timer(1);
 
 		/* Guarantee that the following sequences execute
 		 * uninterrupted.
@@ -496,7 +496,7 @@ void timer_tick_interrupt(struct pt_regs *regs)
 {
 	write_seqlock(&xtime_lock);
 
-	do_timer(regs);
+	do_timer(1);
 
 	timer_check_rtc();
 
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 820affbf3e16..a92965f8f9cd 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -93,7 +93,7 @@ irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs)
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
-	do_timer(regs);
+	do_timer(1);
 
 	nsecs = get_time();
 	xtime.tv_sec = nsecs / NSEC_PER_SEC;
diff --git a/arch/v850/kernel/time.c b/arch/v850/kernel/time.c
index a0b46695f186..f4d1a4d3cdc2 100644
--- a/arch/v850/kernel/time.c
+++ b/arch/v850/kernel/time.c
@@ -51,7 +51,7 @@ static irqreturn_t timer_interrupt (int irq, void *dummy, struct pt_regs *regs)
 	if (mach_tick)
 	  mach_tick ();
 
-	do_timer (regs);
+	do_timer (1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 1c255ee76e7c..7ea3bf2a858c 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -415,16 +415,16 @@ void main_timer_handler(struct pt_regs *regs)
 				(((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
 	}
 
-	if (lost > 0) {
+	if (lost > 0)
 		handle_lost_ticks(lost, regs);
-		jiffies += lost;
-	}
+	else
+		lost = 0;
 
 /*
  * Do the timer stuff.
  */
 
-	do_timer(regs);
+	do_timer(lost + 1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 412ab32de391..241db201f40e 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -175,7 +175,7 @@ again:
 
 		last_ccount_stamp = next;
 		next += CCOUNT_PER_JIFFY;
-		do_timer (regs); /* Linux handler in kernel/timer.c */
+		do_timer (1); /* Linux handler in kernel/timer.c */
 
 		if (ntp_synced() &&
 		    xtime.tv_sec - last_rtc_update >= 659 &&
diff --git a/include/asm-arm/arch-clps711x/time.h b/include/asm-arm/arch-clps711x/time.h
index 9cb27cd4e6ae..0e4a3901d3b3 100644
--- a/include/asm-arm/arch-clps711x/time.h
+++ b/include/asm-arm/arch-clps711x/time.h
@@ -29,7 +29,7 @@ static irqreturn_t
 p720t_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	do_leds();
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/include/asm-arm/arch-l7200/time.h b/include/asm-arm/arch-l7200/time.h
index 7b98b533e63a..c69cb508735f 100644
--- a/include/asm-arm/arch-l7200/time.h
+++ b/include/asm-arm/arch-l7200/time.h
@@ -45,7 +45,7 @@
 static irqreturn_t
 timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode(regs));
 #endif
diff --git a/include/asm-i386/mach-default/do_timer.h b/include/asm-i386/mach-default/do_timer.h
index 6312c3e79814..4182c347ef85 100644
--- a/include/asm-i386/mach-default/do_timer.h
+++ b/include/asm-i386/mach-default/do_timer.h
@@ -16,7 +16,7 @@
 
 static inline void do_timer_interrupt_hook(struct pt_regs *regs)
 {
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode_vm(regs));
 #endif
diff --git a/include/asm-i386/mach-visws/do_timer.h b/include/asm-i386/mach-visws/do_timer.h
index 95568e6ca91c..8db618c5a72b 100644
--- a/include/asm-i386/mach-visws/do_timer.h
+++ b/include/asm-i386/mach-visws/do_timer.h
@@ -9,7 +9,7 @@ static inline void do_timer_interrupt_hook(struct pt_regs *regs)
 	/* Clear the interrupt */
 	co_cpu_write(CO_CPU_STAT,co_cpu_read(CO_CPU_STAT) & ~CO_STAT_TIMEINTR);
 
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode_vm(regs));
 #endif
diff --git a/include/asm-i386/mach-voyager/do_timer.h b/include/asm-i386/mach-voyager/do_timer.h
index eaf518098981..099fe9f5c1b2 100644
--- a/include/asm-i386/mach-voyager/do_timer.h
+++ b/include/asm-i386/mach-voyager/do_timer.h
@@ -3,7 +3,7 @@
 
 static inline void do_timer_interrupt_hook(struct pt_regs *regs)
 {
-	do_timer(regs);
+	do_timer(1);
 #ifndef CONFIG_SMP
 	update_process_times(user_mode_vm(regs));
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ed2af8671589..503dea61ff99 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1206,7 +1206,7 @@ extern void switch_uid(struct user_struct *);
 
 #include <asm/current.h>
 
-extern void do_timer(struct pt_regs *);
+extern void do_timer(unsigned long ticks);
 
 extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
 extern int FASTCALL(wake_up_process(struct task_struct * tsk));
diff --git a/kernel/timer.c b/kernel/timer.c
index a2cb1ecb1b28..4f55622b0d38 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1222,10 +1222,8 @@ static inline void calc_load(unsigned long ticks)
 	unsigned long active_tasks; /* fixed-point */
 	static int count = LOAD_FREQ;
 
-	count -= ticks;
-	if (count < 0) {
-		count += LOAD_FREQ;
-		active_tasks = count_active_tasks();
+	active_tasks = count_active_tasks();
+	for (count -= ticks; count < 0; count += LOAD_FREQ) {
 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
@@ -1270,11 +1268,8 @@ void run_local_timers(void)
  * Called by the timer interrupt. xtime_lock must already be taken
  * by the timer IRQ!
  */
-static inline void update_times(void)
+static inline void update_times(unsigned long ticks)
 {
-	unsigned long ticks;
-
-	ticks = jiffies - wall_jiffies;
 	wall_jiffies += ticks;
 	update_wall_time();
 	calc_load(ticks);
@@ -1286,12 +1281,10 @@ static inline void update_times(void)
  * jiffies is defined in the linker script...
  */
 
-void do_timer(struct pt_regs *regs)
+void do_timer(unsigned long ticks)
 {
-	jiffies_64++;
-	/* prevent loading jiffies before storing new jiffies_64 value. */
-	barrier();
-	update_times();
+	jiffies_64 += ticks;
+	update_times(ticks);
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
-- 
cgit v1.2.3


From 5785c95baede8459d70c4aa0f7becb6e8b5fde4b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Fri, 29 Sep 2006 02:00:43 -0700
Subject: [PATCH] tty: make termios_sem a mutex

[akpm@osdl.org: fix]
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/tty_io.c    | 23 ++++++++++++-----------
 drivers/char/tty_ioctl.c | 17 +++++++++--------
 include/linux/tty.h      |  2 +-
 3 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index b4f37c65b95c..142427c6e8f3 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -618,9 +618,9 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags);
  
 static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
 {
-	down(&tty->termios_sem);
+	mutex_lock(&tty->termios_mutex);
 	tty->termios->c_line = num;
-	up(&tty->termios_sem);
+	mutex_unlock(&tty->termios_mutex);
 }
 
 /*
@@ -1338,9 +1338,9 @@ static void do_tty_hangup(void *data)
 	 */
 	if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
 	{
-		down(&tty->termios_sem);
+		mutex_lock(&tty->termios_mutex);
 		*tty->termios = tty->driver->init_termios;
-		up(&tty->termios_sem);
+		mutex_unlock(&tty->termios_mutex);
 	}
 	
 	/* Defer ldisc switch */
@@ -2750,9 +2750,9 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user * arg)
 {
 	int err;
 
-	down(&tty->termios_sem);
+	mutex_lock(&tty->termios_mutex);
 	err = copy_to_user(arg, &tty->winsize, sizeof(*arg));
-	up(&tty->termios_sem);
+	mutex_unlock(&tty->termios_mutex);
 
 	return err ? -EFAULT: 0;
 }
@@ -2782,14 +2782,15 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
 	if (copy_from_user(&tmp_ws, arg, sizeof(*arg)))
 		return -EFAULT;
 
-	down(&tty->termios_sem);
+	mutex_lock(&tty->termios_mutex);
 	if (!memcmp(&tmp_ws, &tty->winsize, sizeof(*arg)))
 		goto done;
 
 #ifdef CONFIG_VT
 	if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) {
-		if (vc_lock_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row)) {
-			up(&tty->termios_sem);
+		if (vc_lock_resize(tty->driver_data, tmp_ws.ws_col,
+					tmp_ws.ws_row)) {
+			mutex_unlock(&tty->termios_mutex);
  			return -ENXIO;
 		}
 	}
@@ -2801,7 +2802,7 @@ static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty,
 	tty->winsize = tmp_ws;
 	real_tty->winsize = tmp_ws;
 done:
-	up(&tty->termios_sem);
+	mutex_unlock(&tty->termios_mutex);
 	return 0;
 }
 
@@ -3576,7 +3577,7 @@ static void initialize_tty_struct(struct tty_struct *tty)
 	tty_buffer_init(tty);
 	INIT_WORK(&tty->buf.work, flush_to_ldisc, tty);
 	init_MUTEX(&tty->buf.pty_sem);
-	init_MUTEX(&tty->termios_sem);
+	mutex_init(&tty->termios_mutex);
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
 	INIT_WORK(&tty->hangup_work, do_tty_hangup, tty);
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index 4ad47d321bd4..4d540619ac84 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
+#include <linux/mutex.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
@@ -131,7 +132,7 @@ static void change_termios(struct tty_struct * tty, struct termios * new_termios
 
 	/* FIXME: we need to decide on some locking/ordering semantics
 	   for the set_termios notification eventually */
-	down(&tty->termios_sem);
+	mutex_lock(&tty->termios_mutex);
 
 	*tty->termios = *new_termios;
 	unset_locked_termios(tty->termios, &old_termios, tty->termios_locked);
@@ -176,7 +177,7 @@ static void change_termios(struct tty_struct * tty, struct termios * new_termios
 			(ld->set_termios)(tty, &old_termios);
 		tty_ldisc_deref(ld);
 	}
-	up(&tty->termios_sem);
+	mutex_unlock(&tty->termios_mutex);
 }
 
 /**
@@ -284,13 +285,13 @@ static int get_sgttyb(struct tty_struct * tty, struct sgttyb __user * sgttyb)
 {
 	struct sgttyb tmp;
 
-	down(&tty->termios_sem);
+	mutex_lock(&tty->termios_mutex);
 	tmp.sg_ispeed = 0;
 	tmp.sg_ospeed = 0;
 	tmp.sg_erase = tty->termios->c_cc[VERASE];
 	tmp.sg_kill = tty->termios->c_cc[VKILL];
 	tmp.sg_flags = get_sgflags(tty);
-	up(&tty->termios_sem);
+	mutex_unlock(&tty->termios_mutex);
 	
 	return copy_to_user(sgttyb, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
@@ -345,12 +346,12 @@ static int set_sgttyb(struct tty_struct * tty, struct sgttyb __user * sgttyb)
 	if (copy_from_user(&tmp, sgttyb, sizeof(tmp)))
 		return -EFAULT;
 
-	down(&tty->termios_sem);		
+	mutex_lock(&tty->termios_mutex);
 	termios =  *tty->termios;
 	termios.c_cc[VERASE] = tmp.sg_erase;
 	termios.c_cc[VKILL] = tmp.sg_kill;
 	set_sgflags(&termios, tmp.sg_flags);
-	up(&tty->termios_sem);
+	mutex_unlock(&tty->termios_mutex);
 	change_termios(tty, &termios);
 	return 0;
 }
@@ -592,11 +593,11 @@ int n_tty_ioctl(struct tty_struct * tty, struct file * file,
 		case TIOCSSOFTCAR:
 			if (get_user(arg, (unsigned int __user *) arg))
 				return -EFAULT;
-			down(&tty->termios_sem);
+			mutex_lock(&tty->termios_mutex);
 			tty->termios->c_cflag =
 				((tty->termios->c_cflag & ~CLOCAL) |
 				 (arg ? CLOCAL : 0));
-			up(&tty->termios_sem);
+			mutex_unlock(&tty->termios_mutex);
 			return 0;
 		default:
 			return -ENOIOCTLCMD;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d1dec3d0c814..ea4c2605f8da 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -174,7 +174,7 @@ struct tty_struct {
 	struct tty_driver *driver;
 	int index;
 	struct tty_ldisc ldisc;
-	struct semaphore termios_sem;
+	struct mutex termios_mutex;
 	struct termios *termios, *termios_locked;
 	char name[64];
 	int pgrp;
-- 
cgit v1.2.3


From 416bc51292f977b43b010c6dd937522b90062390 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 29 Sep 2006 02:00:45 -0700
Subject: [PATCH] Use decimal for PTRACE_ATTACH and PTRACE_DETACH.

It is sure confusing that linux/ptrace.h has:
	#define PTRACE_SINGLESTEP	   9
	#define PTRACE_ATTACH		0x10
	#define PTRACE_DETACH		0x11
	#define PTRACE_SYSCALL		  24
All the low-numbered constants are in decimal, but the last two in hex.
It sure makes it likely that someone will look at this and think that
9, 10, 11 are used, and that 16 and 17 are not used.

How about we use the same notation for all the numbers [0,24] in the
same short list?

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ptrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 8b2749a259dc..eeb1976ef7bf 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -16,8 +16,8 @@
 #define PTRACE_KILL		   8
 #define PTRACE_SINGLESTEP	   9
 
-#define PTRACE_ATTACH		0x10
-#define PTRACE_DETACH		0x11
+#define PTRACE_ATTACH		  16
+#define PTRACE_DETACH		  17
 
 #define PTRACE_SYSCALL		  24
 
-- 
cgit v1.2.3


From 57a6f51c4281aa3119975473c70dce0480d322bd Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 29 Sep 2006 02:00:49 -0700
Subject: [PATCH] introduce is_rt_policy() helper

Imho, makes the code a bit easier to read.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 4 ++--
 kernel/sched.c        | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 503dea61ff99..fbc69cc3923d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -504,8 +504,8 @@ struct signal_struct {
 #define rt_prio(prio)		unlikely((prio) < MAX_RT_PRIO)
 #define rt_task(p)		rt_prio((p)->prio)
 #define batch_task(p)		(unlikely((p)->policy == SCHED_BATCH))
-#define has_rt_policy(p) \
-	unlikely((p)->policy != SCHED_NORMAL && (p)->policy != SCHED_BATCH)
+#define is_rt_policy(p)		((p) != SCHED_NORMAL && (p) != SCHED_BATCH)
+#define has_rt_policy(p)	unlikely(is_rt_policy((p)->policy))
 
 /*
  * Some day this will be a full-fledged user tracking system..
diff --git a/kernel/sched.c b/kernel/sched.c
index f9b3c6a414f1..c3c718aea618 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4109,8 +4109,7 @@ recheck:
 	    (p->mm && param->sched_priority > MAX_USER_RT_PRIO-1) ||
 	    (!p->mm && param->sched_priority > MAX_RT_PRIO-1))
 		return -EINVAL;
-	if ((policy == SCHED_NORMAL || policy == SCHED_BATCH)
-					!= (param->sched_priority == 0))
+	if (is_rt_policy(policy) != (param->sched_priority != 0))
 		return -EINVAL;
 
 	/*
@@ -4134,7 +4133,7 @@ recheck:
 				!rlim_rtprio)
 			return -EPERM;
 		/* can't increase priority */
-		if ((policy != SCHED_NORMAL && policy != SCHED_BATCH) &&
+		if (is_rt_policy(policy) &&
 		    param->sched_priority > p->rt_priority &&
 		    param->sched_priority > rlim_rtprio)
 			return -EPERM;
-- 
cgit v1.2.3


From 9f50b93f066f8dc339de9b0eb78a22a75e6c8f8f Mon Sep 17 00:00:00 2001
From: Josh Triplett <josht@us.ibm.com>
Date: Fri, 29 Sep 2006 02:00:59 -0700
Subject: [PATCH] Make spinlock/rwlock annotations more accurate by using
 parameters, not types

The lock annotations used on spinlocks and rwlocks currently use
__{acquires,releases}(spinlock_t) and __{acquires,releases}(rwlock_t),
respectively.  This loses the information of which lock actually got
acquired or released, and assumes a different type for the parameter of
__acquires and __releases than the rest of the kernel.  While the current
implementations of __acquires and __releases throw away their argument,
this will not always remain the case.  Change this to use the lock
parameter instead, to preserve this information and increase consistency in
usage of __acquires and __releases.

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/spinlock_api_smp.h | 50 ++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index b2c4f8299464..8828b8155e9c 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -19,41 +19,41 @@ int in_lock_functions(unsigned long addr);
 
 #define assert_spin_locked(x)	BUG_ON(!spin_is_locked(x))
 
-void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(spinlock_t);
+void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
 void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
-							__acquires(spinlock_t);
-void __lockfunc _read_lock(rwlock_t *lock)		__acquires(rwlock_t);
-void __lockfunc _write_lock(rwlock_t *lock)		__acquires(rwlock_t);
-void __lockfunc _spin_lock_bh(spinlock_t *lock)		__acquires(spinlock_t);
-void __lockfunc _read_lock_bh(rwlock_t *lock)		__acquires(rwlock_t);
-void __lockfunc _write_lock_bh(rwlock_t *lock)		__acquires(rwlock_t);
-void __lockfunc _spin_lock_irq(spinlock_t *lock)	__acquires(spinlock_t);
-void __lockfunc _read_lock_irq(rwlock_t *lock)		__acquires(rwlock_t);
-void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(rwlock_t);
+							__acquires(lock);
+void __lockfunc _read_lock(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _write_lock(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _spin_lock_bh(spinlock_t *lock)		__acquires(lock);
+void __lockfunc _read_lock_bh(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _write_lock_bh(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _spin_lock_irq(spinlock_t *lock)	__acquires(lock);
+void __lockfunc _read_lock_irq(rwlock_t *lock)		__acquires(lock);
+void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(lock);
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
-							__acquires(spinlock_t);
+							__acquires(lock);
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
-							__acquires(rwlock_t);
+							__acquires(lock);
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
-							__acquires(rwlock_t);
+							__acquires(lock);
 int __lockfunc _spin_trylock(spinlock_t *lock);
 int __lockfunc _read_trylock(rwlock_t *lock);
 int __lockfunc _write_trylock(rwlock_t *lock);
 int __lockfunc _spin_trylock_bh(spinlock_t *lock);
-void __lockfunc _spin_unlock(spinlock_t *lock)		__releases(spinlock_t);
-void __lockfunc _read_unlock(rwlock_t *lock)		__releases(rwlock_t);
-void __lockfunc _write_unlock(rwlock_t *lock)		__releases(rwlock_t);
-void __lockfunc _spin_unlock_bh(spinlock_t *lock)	__releases(spinlock_t);
-void __lockfunc _read_unlock_bh(rwlock_t *lock)		__releases(rwlock_t);
-void __lockfunc _write_unlock_bh(rwlock_t *lock)	__releases(rwlock_t);
-void __lockfunc _spin_unlock_irq(spinlock_t *lock)	__releases(spinlock_t);
-void __lockfunc _read_unlock_irq(rwlock_t *lock)	__releases(rwlock_t);
-void __lockfunc _write_unlock_irq(rwlock_t *lock)	__releases(rwlock_t);
+void __lockfunc _spin_unlock(spinlock_t *lock)		__releases(lock);
+void __lockfunc _read_unlock(rwlock_t *lock)		__releases(lock);
+void __lockfunc _write_unlock(rwlock_t *lock)		__releases(lock);
+void __lockfunc _spin_unlock_bh(spinlock_t *lock)	__releases(lock);
+void __lockfunc _read_unlock_bh(rwlock_t *lock)		__releases(lock);
+void __lockfunc _write_unlock_bh(rwlock_t *lock)	__releases(lock);
+void __lockfunc _spin_unlock_irq(spinlock_t *lock)	__releases(lock);
+void __lockfunc _read_unlock_irq(rwlock_t *lock)	__releases(lock);
+void __lockfunc _write_unlock_irq(rwlock_t *lock)	__releases(lock);
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
-							__releases(spinlock_t);
+							__releases(lock);
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
-							__releases(rwlock_t);
+							__releases(lock);
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
-							__releases(rwlock_t);
+							__releases(lock);
 
 #endif /* __LINUX_SPINLOCK_API_SMP_H */
-- 
cgit v1.2.3


From 303912e2a32aa73785b4c4dee15466d944a38a46 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josht@us.ibm.com>
Date: Fri, 29 Sep 2006 02:01:00 -0700
Subject: [PATCH] Replace _spin_trylock with spin_trylock in the IRQ variants
 to use __cond_lock

spin_trylock_irq and spin_trylock_irqsave use _spin_trylock, which does not
use the __cond_lock wrapper annotation and thus does not affect the lock
context; change them to use spin_trylock instead, which does use
__cond_lock.

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/spinlock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 31473db92d3b..456e74f0e129 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -241,14 +241,14 @@ do {								\
 #define spin_trylock_irq(lock) \
 ({ \
 	local_irq_disable(); \
-	_spin_trylock(lock) ? \
+	spin_trylock(lock) ? \
 	1 : ({ local_irq_enable(); 0;  }); \
 })
 
 #define spin_trylock_irqsave(lock, flags) \
 ({ \
 	local_irq_save(flags); \
-	_spin_trylock(lock) ? \
+	spin_trylock(lock) ? \
 	1 : ({ local_irq_restore(flags); 0; }); \
 })
 
-- 
cgit v1.2.3


From dcc8e559ee5ae03fa6bdb8611d76d86d0083e793 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josht@us.ibm.com>
Date: Fri, 29 Sep 2006 02:01:03 -0700
Subject: [PATCH] Pass a lock expression to __cond_lock, like __acquire and
 __release

Currently, __acquire and __release take a lock expression, but __cond_lock
takes only a condition, not the lock acquired if the expression evaluates
to true.  Change __cond_lock to accept a lock expression, and change all
the callers to pass in a lock expression.

Signed-off-by: Josh Triplett <josh@freedesktop.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/compiler.h |  4 ++--
 include/linux/spinlock.h | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 060b96112ec6..0780de440220 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -14,7 +14,7 @@
 # define __releases(x)	__attribute__((context(1,0)))
 # define __acquire(x)	__context__(1)
 # define __release(x)	__context__(-1)
-# define __cond_lock(x)	((x) ? ({ __context__(1); 1; }) : 0)
+# define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
 extern void __chk_user_ptr(void __user *);
 extern void __chk_io_ptr(void __iomem *);
 #else
@@ -31,7 +31,7 @@ extern void __chk_io_ptr(void __iomem *);
 # define __releases(x)
 # define __acquire(x) (void)0
 # define __release(x) (void)0
-# define __cond_lock(x) (x)
+# define __cond_lock(x,c) (c)
 #endif
 
 #ifdef __KERNEL__
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 456e74f0e129..b800d2d68b32 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -167,9 +167,9 @@ do {								\
  * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various
  * methods are defined as nops in the case they are not required.
  */
-#define spin_trylock(lock)		__cond_lock(_spin_trylock(lock))
-#define read_trylock(lock)		__cond_lock(_read_trylock(lock))
-#define write_trylock(lock)		__cond_lock(_write_trylock(lock))
+#define spin_trylock(lock)		__cond_lock(lock, _spin_trylock(lock))
+#define read_trylock(lock)		__cond_lock(lock, _read_trylock(lock))
+#define write_trylock(lock)		__cond_lock(lock, _write_trylock(lock))
 
 #define spin_lock(lock)			_spin_lock(lock)
 
@@ -236,7 +236,7 @@ do {								\
 					_write_unlock_irqrestore(lock, flags)
 #define write_unlock_bh(lock)		_write_unlock_bh(lock)
 
-#define spin_trylock_bh(lock)		__cond_lock(_spin_trylock_bh(lock))
+#define spin_trylock_bh(lock)	__cond_lock(lock, _spin_trylock_bh(lock))
 
 #define spin_trylock_irq(lock) \
 ({ \
@@ -264,7 +264,7 @@ do {								\
  */
 extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
 #define atomic_dec_and_lock(atomic, lock) \
-		__cond_lock(_atomic_dec_and_lock(atomic, lock))
+		__cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
 
 /**
  * spin_can_lock - would spin_trylock() succeed?
-- 
cgit v1.2.3


From 368bdb3d616fa352971f45b423ae6344715e620b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 29 Sep 2006 02:01:05 -0700
Subject: [PATCH] cramfs: make cramfs_uncompress_exit() return void

It always returns 0, so relying on it is useless.  The only caller isn't
checking return value.  In general, un-, de-, -free functions should return
void.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cramfs/uncompress.c    | 3 +--
 include/linux/cramfs_fs.h | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 8def89f2c438..fc3ccb74626f 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -68,11 +68,10 @@ int cramfs_uncompress_init(void)
 	return 0;
 }
 
-int cramfs_uncompress_exit(void)
+void cramfs_uncompress_exit(void)
 {
 	if (!--initialized) {
 		zlib_inflateEnd(&stream);
 		vfree(stream.workspace);
 	}
-	return 0;
 }
diff --git a/include/linux/cramfs_fs.h b/include/linux/cramfs_fs.h
index a41f38428c37..1dba681e428d 100644
--- a/include/linux/cramfs_fs.h
+++ b/include/linux/cramfs_fs.h
@@ -87,6 +87,6 @@ struct cramfs_super {
 /* Uncompression interfaces to the underlying zlib */
 int cramfs_uncompress_block(void *dst, int dstlen, void *src, int srclen);
 int cramfs_uncompress_init(void);
-int cramfs_uncompress_exit(void);
+void cramfs_uncompress_exit(void);
 
 #endif
-- 
cgit v1.2.3


From e8106b941ceab68cc5ff713df7b1276484554584 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 29 Sep 2006 02:01:08 -0700
Subject: [PATCH] lockdep: core, add enable/disable_irq_irqsave/irqrestore()
 APIs

Introduce the disable_irq_nosync_lockdep_irqsave() and
enable_irq_lockdep_irqrestore() APIs.  These are needed for NE2000; basically
NE2000 calls disable_irq and enable_irq as locking against the IRQ handler,
but both in cases where interrupts are on and off.  This means that lockdep
needs to track the old state of the virtual irq flags on disable_irq, and
restore these at enable_irq time.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/net/8390.c        |  6 +++---
 include/linux/interrupt.h | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/8390.c b/drivers/net/8390.c
index 5b6b05ed8f3c..9d34056147ad 100644
--- a/drivers/net/8390.c
+++ b/drivers/net/8390.c
@@ -299,7 +299,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *	Slow phase with lock held.
 	 */
 
-	disable_irq_nosync_lockdep(dev->irq);
+	disable_irq_nosync_lockdep_irqsave(dev->irq, &flags);
 
 	spin_lock(&ei_local->page_lock);
 
@@ -338,7 +338,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_stop_queue(dev);
 		outb_p(ENISR_ALL, e8390_base + EN0_IMR);
 		spin_unlock(&ei_local->page_lock);
-		enable_irq_lockdep(dev->irq);
+		enable_irq_lockdep_irqrestore(dev->irq, &flags);
 		ei_local->stat.tx_errors++;
 		return 1;
 	}
@@ -379,7 +379,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	outb_p(ENISR_ALL, e8390_base + EN0_IMR);
 
 	spin_unlock(&ei_local->page_lock);
-	enable_irq_lockdep(dev->irq);
+	enable_irq_lockdep_irqrestore(dev->irq, &flags);
 
 	dev_kfree_skb (skb);
 	ei_local->stat.tx_bytes += send_length;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index d5afee95fd43..1f97e3d92639 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -123,6 +123,14 @@ static inline void disable_irq_nosync_lockdep(unsigned int irq)
 #endif
 }
 
+static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags)
+{
+	disable_irq_nosync(irq);
+#ifdef CONFIG_LOCKDEP
+	local_irq_save(*flags);
+#endif
+}
+
 static inline void disable_irq_lockdep(unsigned int irq)
 {
 	disable_irq(irq);
@@ -139,6 +147,14 @@ static inline void enable_irq_lockdep(unsigned int irq)
 	enable_irq(irq);
 }
 
+static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags)
+{
+#ifdef CONFIG_LOCKDEP
+	local_irq_restore(*flags);
+#endif
+	enable_irq(irq);
+}
+
 /* IRQ wakeup (PM) control: */
 extern int set_irq_wake(unsigned int irq, unsigned int on);
 
-- 
cgit v1.2.3


From 55a101f8f71a3d3dbda7b5c77083ffe47552f831 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 29 Sep 2006 02:01:10 -0700
Subject: [PATCH] kill PF_DEAD flag

After the previous change (->flags & PF_DEAD) <=> (->state == EXIT_DEAD), we
don't need PF_DEAD any longer.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  1 -
 kernel/exit.c         |  6 ++----
 kernel/sched.c        | 16 ++++++++--------
 mm/oom_kill.c         |  4 ++--
 4 files changed, 12 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fbc69cc3923d..9763de334f09 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1061,7 +1061,6 @@ static inline void put_task_struct(struct task_struct *t)
 					/* Not implemented yet, only for 486*/
 #define PF_STARTING	0x00000002	/* being created */
 #define PF_EXITING	0x00000004	/* getting shut down */
-#define PF_DEAD		0x00000008	/* Dead */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
 #define PF_DUMPCORE	0x00000200	/* dumped core */
diff --git a/kernel/exit.c b/kernel/exit.c
index 3d759c98fb11..9dd5f1336da2 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -953,10 +953,8 @@ fastcall NORET_TYPE void do_exit(long code)
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
-	/* PF_DEAD causes final put_task_struct after we schedule. */
 	preempt_disable();
-	BUG_ON(tsk->flags & PF_DEAD);
-	tsk->flags |= PF_DEAD;
+	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = EXIT_DEAD;
 
 	schedule();
@@ -972,7 +970,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code)
 {
 	if (comp)
 		complete(comp);
-	
+
 	do_exit(code);
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index e1646b044b69..a9405d7cc6ab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1755,27 +1755,27 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct mm_struct *mm = rq->prev_mm;
-	unsigned long prev_task_flags;
+	long prev_state;
 
 	rq->prev_mm = NULL;
 
 	/*
 	 * A task struct has one reference for the use as "current".
-	 * If a task dies, then it sets EXIT_ZOMBIE in tsk->exit_state and
-	 * calls schedule one last time. The schedule call will never return,
-	 * and the scheduled task must drop that reference.
-	 * The test for EXIT_ZOMBIE must occur while the runqueue locks are
+	 * If a task dies, then it sets EXIT_DEAD in tsk->state and calls
+	 * schedule one last time. The schedule call will never return, and
+	 * the scheduled task must drop that reference.
+	 * The test for EXIT_DEAD must occur while the runqueue locks are
 	 * still held, otherwise prev could be scheduled on another cpu, die
 	 * there before we look at prev->state, and then the reference would
 	 * be dropped twice.
 	 *		Manfred Spraul <manfred@colorfullife.com>
 	 */
-	prev_task_flags = prev->flags;
+	prev_state = prev->state;
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
 	if (mm)
 		mmdrop(mm);
-	if (unlikely(prev_task_flags & PF_DEAD)) {
+	if (unlikely(prev_state == EXIT_DEAD)) {
 		/*
 		 * Remove function-return probe instances associated with this
 		 * task and put them back on the free list.
@@ -5153,7 +5153,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 	BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
 
 	/* Cannot have done final schedule yet: would have vanished. */
-	BUG_ON(p->flags & PF_DEAD);
+	BUG_ON(p->state == EXIT_DEAD);
 
 	get_task_struct(p);
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f3dd79c1c367..202f186a753a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -226,8 +226,8 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
 		releasing = test_tsk_thread_flag(p, TIF_MEMDIE) ||
 						p->flags & PF_EXITING;
 		if (releasing) {
-			/* PF_DEAD tasks have already released their mm */
-			if (p->flags & PF_DEAD)
+			/* TASK_DEAD tasks have already released their mm */
+			if (p->state == EXIT_DEAD)
 				continue;
 			if (p->flags & PF_EXITING && p == current) {
 				chosen = p;
-- 
cgit v1.2.3


From c394cc9fbb367f87faa2228ec2eabacd2d4701c6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 29 Sep 2006 02:01:11 -0700
Subject: [PATCH] introduce TASK_DEAD state

I am not sure about this patch, I am asking Ingo to take a decision.

task_struct->state == EXIT_DEAD is a very special case, to avoid a confusion
it makes sense to introduce a new state, TASK_DEAD, while EXIT_DEAD should
live only in ->exit_state as documented in sched.h.

Note that this state is not visible to user-space, get_task_state() masks off
unsuitable states.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 1 +
 kernel/exit.c         | 2 +-
 kernel/sched.c        | 8 ++++----
 mm/oom_kill.c         | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9763de334f09..a06fc89cf6e5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -148,6 +148,7 @@ extern unsigned long weighted_cpuload(const int cpu);
 #define EXIT_DEAD		32
 /* in tsk->state again */
 #define TASK_NONINTERACTIVE	64
+#define TASK_DEAD		128
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
diff --git a/kernel/exit.c b/kernel/exit.c
index 9dd5f1336da2..2e4c13cba95a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -955,7 +955,7 @@ fastcall NORET_TYPE void do_exit(long code)
 
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
-	tsk->state = EXIT_DEAD;
+	tsk->state = TASK_DEAD;
 
 	schedule();
 	BUG();
diff --git a/kernel/sched.c b/kernel/sched.c
index a9405d7cc6ab..74f169ac0773 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1761,10 +1761,10 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 
 	/*
 	 * A task struct has one reference for the use as "current".
-	 * If a task dies, then it sets EXIT_DEAD in tsk->state and calls
+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
 	 * schedule one last time. The schedule call will never return, and
 	 * the scheduled task must drop that reference.
-	 * The test for EXIT_DEAD must occur while the runqueue locks are
+	 * The test for TASK_DEAD must occur while the runqueue locks are
 	 * still held, otherwise prev could be scheduled on another cpu, die
 	 * there before we look at prev->state, and then the reference would
 	 * be dropped twice.
@@ -1775,7 +1775,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	finish_lock_switch(rq, prev);
 	if (mm)
 		mmdrop(mm);
-	if (unlikely(prev_state == EXIT_DEAD)) {
+	if (unlikely(prev_state == TASK_DEAD)) {
 		/*
 		 * Remove function-return probe instances associated with this
 		 * task and put them back on the free list.
@@ -5153,7 +5153,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 	BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
 
 	/* Cannot have done final schedule yet: would have vanished. */
-	BUG_ON(p->state == EXIT_DEAD);
+	BUG_ON(p->state == TASK_DEAD);
 
 	get_task_struct(p);
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 202f186a753a..21f0a7e8e514 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -227,7 +227,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
 						p->flags & PF_EXITING;
 		if (releasing) {
 			/* TASK_DEAD tasks have already released their mm */
-			if (p->state == EXIT_DEAD)
+			if (p->state == TASK_DEAD)
 				continue;
 			if (p->flags & PF_EXITING && p == current) {
 				chosen = p;
-- 
cgit v1.2.3


From 38837fc75acb7fa9b0e111b0241fe4fe76c5d4b3 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Fri, 29 Sep 2006 02:01:16 -0700
Subject: [PATCH] cpuset: top_cpuset tracks hotplug changes to node_online_map

Change the list of memory nodes allowed to tasks in the top (root) nodeset
to dynamically track what cpus are online, using a call to a cpuset hook
from the memory hotplug code.  Make this top cpus file read-only.

On systems that have cpusets configured in their kernel, but that aren't
actively using cpusets (for some distros, this covers the majority of
systems) all tasks end up in the top cpuset.

If that system does support memory hotplug, then these tasks cannot make
use of memory nodes that are added after system boot, because the memory
nodes are not allowed in the top cpuset.  This is a surprising regression
over earlier kernels that didn't have cpusets enabled.

One key motivation for this change is to remain consistent with the
behaviour for the top_cpuset's 'cpus', which is also read-only, and which
automatically tracks the cpu_online_map.

This change also has the minor benefit that it fixes a long standing,
little noticed, minor bug in cpusets.  The cpuset performance tweak to
short circuit the cpuset_zone_allowed() check on systems with just a single
cpuset (see 'number_of_cpusets', in linux/cpuset.h) meant that simply
changing the 'mems' of the top_cpuset had no affect, even though the change
(the write system call) appeared to succeed.  With the following change,
that write to the 'mems' file fails -EACCES, and the 'mems' file stubbornly
refuses to be changed via user space writes.  Thus no one should be mislead
into thinking they've changed the top_cpusets's 'mems' when in affect they
haven't.

In order to keep the behaviour of cpusets consistent between systems
actively making use of them and systems not using them, this patch changes
the behaviour of the 'mems' file in the top (root) cpuset, making it read
only, and making it automatically track the value of node_online_map.  Thus
tasks in the top cpuset will have automatic use of hot plugged memory nodes
allowed by their cpuset.

[akpm@osdl.org: build fix]
[bunk@stusta.de: build fix]
Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/cpusets.txt | 10 +++++-----
 include/linux/cpuset.h    |  4 ++++
 kernel/cpuset.c           | 28 +++++++++++++++++++++++++---
 mm/memory_hotplug.c       |  3 +++
 4 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 76b44290c154..842f0d1ab216 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -217,11 +217,11 @@ exclusive cpuset.  Also, the use of a Linux virtual file system (vfs)
 to represent the cpuset hierarchy provides for a familiar permission
 and name space for cpusets, with a minimum of additional kernel code.
 
-The cpus file in the root (top_cpuset) cpuset is read-only.
-It automatically tracks the value of cpu_online_map, using a CPU
-hotplug notifier.  If and when memory nodes can be hotplugged,
-we expect to make the mems file in the root cpuset read-only
-as well, and have it track the value of node_online_map.
+The cpus and mems files in the root (top_cpuset) cpuset are
+read-only.  The cpus file automatically tracks the value of
+cpu_online_map using a CPU hotplug notifier, and the mems file
+automatically tracks the value of node_online_map using the
+cpuset_track_online_nodes() hook.
 
 
 1.4 What are exclusive cpusets ?
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 9354722a9217..4d8adf663681 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -63,6 +63,8 @@ static inline int cpuset_do_slab_mem_spread(void)
 	return current->flags & PF_SPREAD_SLAB;
 }
 
+extern void cpuset_track_online_nodes(void);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline int cpuset_init_early(void) { return 0; }
@@ -126,6 +128,8 @@ static inline int cpuset_do_slab_mem_spread(void)
 	return 0;
 }
 
+static inline void cpuset_track_online_nodes(void) {}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 584bb4e6c042..794af5024c2f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -912,6 +912,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 	int fudge;
 	int retval;
 
+	/* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
+	if (cs == &top_cpuset)
+		return -EACCES;
+
 	trialcs = *cs;
 	retval = nodelist_parse(buf, trialcs.mems_allowed);
 	if (retval < 0)
@@ -2042,9 +2046,8 @@ out:
  * (of no affect) on systems that are actively using CPU hotplug
  * but making no active use of cpusets.
  *
- * This handles CPU hotplug (cpuhp) events.  If someday Memory
- * Nodes can be hotplugged (dynamically changing node_online_map)
- * then we should handle that too, perhaps in a similar way.
+ * This routine ensures that top_cpuset.cpus_allowed tracks
+ * cpu_online_map on each CPU hotplug (cpuhp) event.
  */
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -2063,6 +2066,25 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
 }
 #endif
 
+/*
+ * Keep top_cpuset.mems_allowed tracking node_online_map.
+ * Call this routine anytime after you change node_online_map.
+ * See also the previous routine cpuset_handle_cpuhp().
+ */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void cpuset_track_online_nodes()
+{
+	mutex_lock(&manage_mutex);
+	mutex_lock(&callback_mutex);
+
+	top_cpuset.mems_allowed = node_online_map;
+
+	mutex_unlock(&callback_mutex);
+	mutex_unlock(&manage_mutex);
+}
+#endif
+
 /**
  * cpuset_init_smp - initialize cpus_allowed
  *
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c37319542b70..9576ed920c0a 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -21,6 +21,7 @@
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
 #include <linux/ioport.h>
+#include <linux/cpuset.h>
 
 #include <asm/tlbflush.h>
 
@@ -283,6 +284,8 @@ int add_memory(int nid, u64 start, u64 size)
 	/* we online node here. we can't roll back from here. */
 	node_set_online(nid);
 
+	cpuset_track_online_nodes();
+
 	if (new_pgdat) {
 		ret = register_one_node(nid);
 		/*
-- 
cgit v1.2.3


From 2d1d43f6a43b703587e759145f69467e7c6553a7 Mon Sep 17 00:00:00 2001
From: Chandra Seetharaman <sekharan@us.ibm.com>
Date: Fri, 29 Sep 2006 02:01:25 -0700
Subject: [PATCH] call mm/page-writeback.c:set_ratelimit() when new pages are
 hot-added

ratelimit_pages in page-writeback.c is recalculated (in set_ratelimit())
every time a CPU is hot-added/removed.  But this value is not recalculated
when new pages are hot-added.

This patch fixes that problem by calling set_ratelimit() when new pages
are hot-added.

[akpm@osdl.org: cleanups]
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/writeback.h | 1 +
 mm/memory_hotplug.c       | 2 ++
 mm/page-writeback.c       | 6 +++---
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 56a23a0e7f2e..9d4074ecd0cd 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -117,6 +117,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
 int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
 			   loff_t pos, loff_t count);
 void set_page_dirty_balance(struct page *page);
+void writeback_set_ratelimit(void);
 
 /* pdflush.c */
 extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9576ed920c0a..2053bb165a21 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -13,6 +13,7 @@
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/pagevec.h>
+#include <linux/writeback.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
@@ -192,6 +193,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	if (need_zonelists_rebuild)
 		build_all_zonelists();
 	vm_total_pages = nr_free_pagecache_pages();
+	writeback_set_ratelimit();
 	return 0;
 }
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index efd2705e4986..488b7088557c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -501,7 +501,7 @@ void laptop_sync_completion(void)
  * will write six megabyte chunks, max.
  */
 
-static void set_ratelimit(void)
+void writeback_set_ratelimit(void)
 {
 	ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
 	if (ratelimit_pages < 16)
@@ -513,7 +513,7 @@ static void set_ratelimit(void)
 static int __cpuinit
 ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
 {
-	set_ratelimit();
+	writeback_set_ratelimit();
 	return 0;
 }
 
@@ -546,7 +546,7 @@ void __init page_writeback_init(void)
 			vm_dirty_ratio = 1;
 	}
 	mod_timer(&wb_timer, jiffies + dirty_writeback_interval);
-	set_ratelimit();
+	writeback_set_ratelimit();
 	register_cpu_notifier(&ratelimit_nb);
 }
 
-- 
cgit v1.2.3


From 04b1db9fd7eea63c9663072feece616ea41b0a79 Mon Sep 17 00:00:00 2001
From: "Ian S. Nelson" <nelsonian@comcast.net>
Date: Fri, 29 Sep 2006 02:01:31 -0700
Subject: [PATCH] /sys/modules: allow full length section names

I've been using systemtap for some debugging and I noticed that it can't
probe a lot of modules.  Turns out it's kind of silly, the sections section
of /sys/module is limited to 32byte filenames and many of the actual
sections are a a bit longer than that.

[akpm@osdl.org: rewrite to use dymanic allocation]
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 CREDITS                |  3 ++-
 include/linux/module.h |  4 ++--
 kernel/module.c        | 26 ++++++++++++++++++++------
 3 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/CREDITS b/CREDITS
index 8feb2bb49e35..66e82466dde8 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2478,7 +2478,8 @@ S: Derbyshire DE4 3RL
 S: United Kingdom
 
 N: Ian S. Nelson
-E: ian.nelson@echostar.com
+E: nelsonis@earthlink.net
+P: 1024D/00D3D983 3EFD 7B86 B888 D7E2 29B6  9E97 576F 1B97 00D3 D983
 D: Minor mmap and ide hacks
 S: 1370 Atlantis Ave.
 S: Lafayette CO, 80026
diff --git a/include/linux/module.h b/include/linux/module.h
index d4486cc2e7fe..2c599175c583 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -232,17 +232,17 @@ enum module_state
 };
 
 /* Similar stuff for section attributes. */
-#define MODULE_SECT_NAME_LEN 32
 struct module_sect_attr
 {
 	struct module_attribute mattr;
-	char name[MODULE_SECT_NAME_LEN];
+	char *name;
 	unsigned long address;
 };
 
 struct module_sect_attrs
 {
 	struct attribute_group grp;
+	int nsections;
 	struct module_sect_attr attrs[0];
 };
 
diff --git a/kernel/module.c b/kernel/module.c
index b7fe6e840963..05625d5dc758 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -933,6 +933,15 @@ static ssize_t module_sect_show(struct module_attribute *mattr,
 	return sprintf(buf, "0x%lx\n", sattr->address);
 }
 
+static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
+{
+	int section;
+
+	for (section = 0; section < sect_attrs->nsections; section++)
+		kfree(sect_attrs->attrs[section].name);
+	kfree(sect_attrs);
+}
+
 static void add_sect_attrs(struct module *mod, unsigned int nsect,
 		char *secstrings, Elf_Shdr *sechdrs)
 {
@@ -949,21 +958,26 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
 			+ nloaded * sizeof(sect_attrs->attrs[0]),
 			sizeof(sect_attrs->grp.attrs[0]));
 	size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]);
-	if (! (sect_attrs = kmalloc(size[0] + size[1], GFP_KERNEL)))
+	sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL);
+	if (sect_attrs == NULL)
 		return;
 
 	/* Setup section attributes. */
 	sect_attrs->grp.name = "sections";
 	sect_attrs->grp.attrs = (void *)sect_attrs + size[0];
 
+	sect_attrs->nsections = 0;
 	sattr = &sect_attrs->attrs[0];
 	gattr = &sect_attrs->grp.attrs[0];
 	for (i = 0; i < nsect; i++) {
 		if (! (sechdrs[i].sh_flags & SHF_ALLOC))
 			continue;
 		sattr->address = sechdrs[i].sh_addr;
-		strlcpy(sattr->name, secstrings + sechdrs[i].sh_name,
-			MODULE_SECT_NAME_LEN);
+		sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
+					GFP_KERNEL);
+		if (sattr->name == NULL)
+			goto out;
+		sect_attrs->nsections++;
 		sattr->mattr.show = module_sect_show;
 		sattr->mattr.store = NULL;
 		sattr->mattr.attr.name = sattr->name;
@@ -979,7 +993,7 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
 	mod->sect_attrs = sect_attrs;
 	return;
   out:
-	kfree(sect_attrs);
+	free_sect_attrs(sect_attrs);
 }
 
 static void remove_sect_attrs(struct module *mod)
@@ -989,13 +1003,13 @@ static void remove_sect_attrs(struct module *mod)
 				   &mod->sect_attrs->grp);
 		/* We are positive that no one is using any sect attrs
 		 * at this point.  Deallocate immediately. */
-		kfree(mod->sect_attrs);
+		free_sect_attrs(mod->sect_attrs);
 		mod->sect_attrs = NULL;
 	}
 }
 
-
 #else
+
 static inline void add_sect_attrs(struct module *mod, unsigned int nsect,
 		char *sectstrings, Elf_Shdr *sechdrs)
 {
-- 
cgit v1.2.3


From f0c8bd164e1a0585d7e46896553136b4f488bd19 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Fri, 29 Sep 2006 02:01:34 -0700
Subject: [PATCH] Generic infrastructure for acls

The patches solve the following problem: We want to grant access to devices
based on who is logged in from where, etc.  This includes switching back and
forth between multiple user sessions, etc.

Using ACLs to define device access for logged-in users gives us all the
flexibility we need in order to fully solve the problem.

Device special files nowadays usually live on tmpfs, hence tmpfs ACLs.

Different distros have come up with solutions that solve the problem to
different degrees: SUSE uses a resource manager which tracks login sessions
and sets ACLs on device inodes as appropriate.  RedHat uses pam_console, which
changes the primary file ownership to the logged-in user.  Others use a set of
groups that users must be in in order to be granted the appropriate accesses.

The freedesktop.org project plans to implement a combination of a
console-tracker and a HAL-device-list based solution to grant access to
devices to users, and more distros will likely follow this approach.

These patches have first been posted here on 2 February 2005, and again
on 8 January 2006. We have been shipping them in SLES9 and SLES10 with
no problems reported.  The previous submission is archived here:

   http://lkml.org/lkml/2006/1/8/229
   http://lkml.org/lkml/2006/1/8/230
   http://lkml.org/lkml/2006/1/8/231

This patch:

Add some infrastructure for access control lists on in-memory
filesystems such as tmpfs.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig                  |   4 +
 fs/Makefile                 |   1 +
 fs/generic_acl.c            | 197 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/generic_acl.h |  36 ++++++++
 4 files changed, 238 insertions(+)
 create mode 100644 fs/generic_acl.c
 create mode 100644 include/linux/generic_acl.h

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index d311198bba43..deb9eec9f6ee 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1940,6 +1940,10 @@ config 9P_FS
 
 	  If unsure, say N.
 
+config GENERIC_ACL
+	bool
+	select FS_POSIX_ACL
+
 endmenu
 
 menu "Partition Types"
diff --git a/fs/Makefile b/fs/Makefile
index 89135428a539..46b8cfe497b2 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o
 obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o
 obj-$(CONFIG_FS_POSIX_ACL)	+= posix_acl.o xattr_acl.o
 obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
+obj-$(CONFIG_GENERIC_ACL)	+= generic_acl.o
 
 obj-$(CONFIG_QUOTA)		+= dquot.o
 obj-$(CONFIG_QFMT_V1)		+= quota_v1.o
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
new file mode 100644
index 000000000000..9ccb78947171
--- /dev/null
+++ b/fs/generic_acl.c
@@ -0,0 +1,197 @@
+/*
+ * fs/generic_acl.c
+ *
+ * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/generic_acl.h>
+
+/**
+ * generic_acl_list  -  Generic xattr_handler->list() operation
+ * @ops:	Filesystem specific getacl and setacl callbacks
+ */
+size_t
+generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
+		 int type, char *list, size_t list_size)
+{
+	struct posix_acl *acl;
+	const char *name;
+	size_t size;
+
+	acl = ops->getacl(inode, type);
+	if (!acl)
+		return 0;
+	posix_acl_release(acl);
+
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			name = POSIX_ACL_XATTR_ACCESS;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			name = POSIX_ACL_XATTR_DEFAULT;
+			break;
+
+		default:
+			return 0;
+	}
+	size = strlen(name) + 1;
+	if (list && size <= list_size)
+		memcpy(list, name, size);
+	return size;
+}
+
+/**
+ * generic_acl_get  -  Generic xattr_handler->get() operation
+ * @ops:	Filesystem specific getacl and setacl callbacks
+ */
+int
+generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
+		int type, void *buffer, size_t size)
+{
+	struct posix_acl *acl;
+	int error;
+
+	acl = ops->getacl(inode, type);
+	if (!acl)
+		return -ENODATA;
+	error = posix_acl_to_xattr(acl, buffer, size);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+/**
+ * generic_acl_set  -  Generic xattr_handler->set() operation
+ * @ops:	Filesystem specific getacl and setacl callbacks
+ */
+int
+generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
+		int type, const void *value, size_t size)
+{
+	struct posix_acl *acl = NULL;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+	if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
+		return -EPERM;
+	if (value) {
+		acl = posix_acl_from_xattr(value, size);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+	}
+	if (acl) {
+		mode_t mode;
+
+		error = posix_acl_valid(acl);
+		if (error)
+			goto failed;
+		switch(type) {
+			case ACL_TYPE_ACCESS:
+				mode = inode->i_mode;
+				error = posix_acl_equiv_mode(acl, &mode);
+				if (error < 0)
+					goto failed;
+				inode->i_mode = mode;
+				if (error == 0) {
+					posix_acl_release(acl);
+					acl = NULL;
+				}
+				break;
+
+			case ACL_TYPE_DEFAULT:
+				if (!S_ISDIR(inode->i_mode)) {
+					error = -EINVAL;
+					goto failed;
+				}
+				break;
+		}
+	}
+	ops->setacl(inode, type, acl);
+	error = 0;
+failed:
+	posix_acl_release(acl);
+	return error;
+}
+
+/**
+ * generic_acl_init  -  Take care of acl inheritance at @inode create time
+ * @ops:	Filesystem specific getacl and setacl callbacks
+ *
+ * Files created inside a directory with a default ACL inherit the
+ * directory's default ACL.
+ */
+int
+generic_acl_init(struct inode *inode, struct inode *dir,
+		 struct generic_acl_operations *ops)
+{
+	struct posix_acl *acl = NULL;
+	mode_t mode = inode->i_mode;
+	int error;
+
+	inode->i_mode = mode & ~current->fs->umask;
+	if (!S_ISLNK(inode->i_mode))
+		acl = ops->getacl(dir, ACL_TYPE_DEFAULT);
+	if (acl) {
+		struct posix_acl *clone;
+
+		if (S_ISDIR(inode->i_mode)) {
+			clone = posix_acl_clone(acl, GFP_KERNEL);
+			error = -ENOMEM;
+			if (!clone)
+				goto cleanup;
+			ops->setacl(inode, ACL_TYPE_DEFAULT, clone);
+			posix_acl_release(clone);
+		}
+		clone = posix_acl_clone(acl, GFP_KERNEL);
+		error = -ENOMEM;
+		if (!clone)
+			goto cleanup;
+		error = posix_acl_create_masq(clone, &mode);
+		if (error >= 0) {
+			inode->i_mode = mode;
+			if (error > 0)
+				ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+		}
+		posix_acl_release(clone);
+	}
+	error = 0;
+
+cleanup:
+	posix_acl_release(acl);
+	return error;
+}
+
+/**
+ * generic_acl_chmod  -  change the access acl of @inode upon chmod()
+ * @ops:	FIlesystem specific getacl and setacl callbacks
+ *
+ * A chmod also changes the permissions of the owner, group/mask, and
+ * other ACL entries.
+ */
+int
+generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
+{
+	struct posix_acl *acl, *clone;
+	int error = 0;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+	acl = ops->getacl(inode, ACL_TYPE_ACCESS);
+	if (acl) {
+		clone = posix_acl_clone(acl, GFP_KERNEL);
+		posix_acl_release(acl);
+		if (!clone)
+			return -ENOMEM;
+		error = posix_acl_chmod_masq(clone, inode->i_mode);
+		if (!error)
+			ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+		posix_acl_release(clone);
+	}
+	return error;
+}
diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h
new file mode 100644
index 000000000000..80764f40be75
--- /dev/null
+++ b/include/linux/generic_acl.h
@@ -0,0 +1,36 @@
+/*
+ * fs/generic_acl.c
+ *
+ * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef GENERIC_ACL_H
+#define GENERIC_ACL_H
+
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+
+/**
+ * struct generic_acl_operations  -  filesystem operations
+ *
+ * Filesystems must make these operations available to the generic
+ * operations.
+ */
+struct generic_acl_operations {
+	struct posix_acl *(*getacl)(struct inode *, int);
+	void (*setacl)(struct inode *, int, struct posix_acl *);
+};
+
+size_t generic_acl_list(struct inode *, struct generic_acl_operations *, int,
+			char *, size_t);
+int generic_acl_get(struct inode *, struct generic_acl_operations *, int,
+		    void *, size_t);
+int generic_acl_set(struct inode *, struct generic_acl_operations *, int,
+		    const void *, size_t);
+int generic_acl_init(struct inode *, struct inode *,
+		     struct generic_acl_operations *);
+int generic_acl_chmod(struct inode *, struct generic_acl_operations *);
+
+#endif
-- 
cgit v1.2.3


From 39f0247d3823e4e0bf8f6838a10362864b1e1053 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Fri, 29 Sep 2006 02:01:35 -0700
Subject: [PATCH] Access Control Lists for tmpfs

Add access control lists for tmpfs.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig               |  13 ++++
 include/linux/shmem_fs.h |  24 ++++++
 mm/Makefile              |   1 +
 mm/shmem.c               |  99 +++++++++++++++++++++++-
 mm/shmem_acl.c           | 197 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 332 insertions(+), 2 deletions(-)
 create mode 100644 mm/shmem_acl.c

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index deb9eec9f6ee..4fd9efac29ab 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -881,6 +881,19 @@ config TMPFS
 
 	  See <file:Documentation/filesystems/tmpfs.txt> for details.
 
+config TMPFS_POSIX_ACL
+	bool "Tmpfs POSIX Access Control Lists"
+	depends on TMPFS
+	select GENERIC_ACL
+	help
+	  POSIX Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N.
+
 config HUGETLBFS
 	bool "HugeTLB file system support"
 	depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index c057f0b32318..f3c51899117f 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -19,6 +19,10 @@ struct shmem_inode_info {
 	swp_entry_t		i_direct[SHMEM_NR_DIRECT]; /* first blocks */
 	struct list_head	swaplist;	/* chain of maybes on swap */
 	struct inode		vfs_inode;
+#ifdef CONFIG_TMPFS_POSIX_ACL
+	struct posix_acl	*i_acl;
+	struct posix_acl	*i_default_acl;
+#endif
 };
 
 struct shmem_sb_info {
@@ -36,4 +40,24 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 	return container_of(inode, struct shmem_inode_info, vfs_inode);
 }
 
+#ifdef CONFIG_TMPFS_POSIX_ACL
+int shmem_permission(struct inode *, int, struct nameidata *);
+int shmem_acl_init(struct inode *, struct inode *);
+void shmem_acl_destroy_inode(struct inode *);
+
+extern struct xattr_handler shmem_xattr_acl_access_handler;
+extern struct xattr_handler shmem_xattr_acl_default_handler;
+
+extern struct generic_acl_operations shmem_acl_ops;
+
+#else
+static inline int shmem_acl_init(struct inode *inode, struct inode *dir)
+{
+	return 0;
+}
+static inline void shmem_acl_destroy_inode(struct inode *inode)
+{
+}
+#endif  /* CONFIG_TMPFS_POSIX_ACL */
+
 #endif
diff --git a/mm/Makefile b/mm/Makefile
index 60c56c0b5e10..6200c6d6afd2 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SPARSEMEM)	+= sparse.o
 obj-$(CONFIG_SHMEM) += shmem.o
+obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/shmem.c b/mm/shmem.c
index eda907c3a86a..b96de69f236b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -26,6 +26,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/xattr.h>
+#include <linux/generic_acl.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/file.h>
@@ -177,6 +179,7 @@ static const struct address_space_operations shmem_aops;
 static struct file_operations shmem_file_operations;
 static struct inode_operations shmem_inode_operations;
 static struct inode_operations shmem_dir_inode_operations;
+static struct inode_operations shmem_special_inode_operations;
 static struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
@@ -637,7 +640,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 	struct page *page = NULL;
 	int error;
 
-	if (attr->ia_valid & ATTR_SIZE) {
+	if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
 		if (attr->ia_size < inode->i_size) {
 			/*
 			 * If truncating down to a partial page, then
@@ -670,6 +673,10 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 	error = inode_change_ok(inode, attr);
 	if (!error)
 		error = inode_setattr(inode, attr);
+#ifdef CONFIG_TMPFS_POSIX_ACL
+	if (!error && (attr->ia_valid & ATTR_MODE))
+		error = generic_acl_chmod(inode, &shmem_acl_ops);
+#endif
 	if (page)
 		page_cache_release(page);
 	return error;
@@ -1362,6 +1369,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
 
 		switch (mode & S_IFMT) {
 		default:
+			inode->i_op = &shmem_special_inode_operations;
 			init_special_inode(inode, mode, dev);
 			break;
 		case S_IFREG:
@@ -1682,7 +1690,11 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 				iput(inode);
 				return error;
 			}
-			error = 0;
+		}
+		error = shmem_acl_init(inode, dir);
+		if (error) {
+			iput(inode);
+			return error;
 		}
 		if (dir->i_mode & S_ISGID) {
 			inode->i_gid = dir->i_gid;
@@ -1897,6 +1909,53 @@ static struct inode_operations shmem_symlink_inode_operations = {
 	.put_link	= shmem_put_link,
 };
 
+#ifdef CONFIG_TMPFS_POSIX_ACL
+/**
+ * Superblocks without xattr inode operations will get security.* xattr
+ * support from the VFS "for free". As soon as we have any other xattrs
+ * like ACLs, we also need to implement the security.* handlers at
+ * filesystem level, though.
+ */
+
+static size_t shmem_xattr_security_list(struct inode *inode, char *list,
+					size_t list_len, const char *name,
+					size_t name_len)
+{
+	return security_inode_listsecurity(inode, list, list_len);
+}
+
+static int shmem_xattr_security_get(struct inode *inode, const char *name,
+				    void *buffer, size_t size)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return security_inode_getsecurity(inode, name, buffer, size,
+					  -EOPNOTSUPP);
+}
+
+static int shmem_xattr_security_set(struct inode *inode, const char *name,
+				    const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return security_inode_setsecurity(inode, name, value, size, flags);
+}
+
+struct xattr_handler shmem_xattr_security_handler = {
+	.prefix = XATTR_SECURITY_PREFIX,
+	.list   = shmem_xattr_security_list,
+	.get    = shmem_xattr_security_get,
+	.set    = shmem_xattr_security_set,
+};
+
+static struct xattr_handler *shmem_xattr_handlers[] = {
+	&shmem_xattr_acl_access_handler,
+	&shmem_xattr_acl_default_handler,
+	&shmem_xattr_security_handler,
+	NULL
+};
+#endif
+
 static int shmem_parse_options(char *options, int *mode, uid_t *uid,
 	gid_t *gid, unsigned long *blocks, unsigned long *inodes,
 	int *policy, nodemask_t *policy_nodes)
@@ -2094,6 +2153,10 @@ static int shmem_fill_super(struct super_block *sb,
 	sb->s_magic = TMPFS_MAGIC;
 	sb->s_op = &shmem_ops;
 	sb->s_time_gran = 1;
+#ifdef CONFIG_TMPFS_POSIX_ACL
+	sb->s_xattr = shmem_xattr_handlers;
+	sb->s_flags |= MS_POSIXACL;
+#endif
 
 	inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
 	if (!inode)
@@ -2130,6 +2193,7 @@ static void shmem_destroy_inode(struct inode *inode)
 		/* only struct inode is valid if it's an inline symlink */
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
 	}
+	shmem_acl_destroy_inode(inode);
 	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
 
@@ -2141,6 +2205,10 @@ static void init_once(void *foo, struct kmem_cache *cachep,
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
 		inode_init_once(&p->vfs_inode);
+#ifdef CONFIG_TMPFS_POSIX_ACL
+		p->i_acl = NULL;
+		p->i_default_acl = NULL;
+#endif
 	}
 }
 
@@ -2184,6 +2252,14 @@ static struct inode_operations shmem_inode_operations = {
 	.truncate	= shmem_truncate,
 	.setattr	= shmem_notify_change,
 	.truncate_range	= shmem_truncate_range,
+#ifdef CONFIG_TMPFS_POSIX_ACL
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= generic_listxattr,
+	.removexattr	= generic_removexattr,
+	.permission	= shmem_permission,
+#endif
+
 };
 
 static struct inode_operations shmem_dir_inode_operations = {
@@ -2198,6 +2274,25 @@ static struct inode_operations shmem_dir_inode_operations = {
 	.mknod		= shmem_mknod,
 	.rename		= shmem_rename,
 #endif
+#ifdef CONFIG_TMPFS_POSIX_ACL
+	.setattr	= shmem_notify_change,
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= generic_listxattr,
+	.removexattr	= generic_removexattr,
+	.permission	= shmem_permission,
+#endif
+};
+
+static struct inode_operations shmem_special_inode_operations = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
+	.setattr	= shmem_notify_change,
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= generic_listxattr,
+	.removexattr	= generic_removexattr,
+	.permission	= shmem_permission,
+#endif
 };
 
 static struct super_operations shmem_ops = {
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
new file mode 100644
index 000000000000..c946bf468718
--- /dev/null
+++ b/mm/shmem_acl.c
@@ -0,0 +1,197 @@
+/*
+ * mm/shmem_acl.c
+ *
+ * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/fs.h>
+#include <linux/shmem_fs.h>
+#include <linux/xattr.h>
+#include <linux/generic_acl.h>
+
+/**
+ * shmem_get_acl  -   generic_acl_operations->getacl() operation
+ */
+static struct posix_acl *
+shmem_get_acl(struct inode *inode, int type)
+{
+	struct posix_acl *acl = NULL;
+
+	spin_lock(&inode->i_lock);
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			acl = posix_acl_dup(SHMEM_I(inode)->i_acl);
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			acl = posix_acl_dup(SHMEM_I(inode)->i_default_acl);
+			break;
+	}
+	spin_unlock(&inode->i_lock);
+
+	return acl;
+}
+
+/**
+ * shmem_get_acl  -   generic_acl_operations->setacl() operation
+ */
+static void
+shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+	struct posix_acl *free = NULL;
+
+	spin_lock(&inode->i_lock);
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			free = SHMEM_I(inode)->i_acl;
+			SHMEM_I(inode)->i_acl = posix_acl_dup(acl);
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			free = SHMEM_I(inode)->i_default_acl;
+			SHMEM_I(inode)->i_default_acl = posix_acl_dup(acl);
+			break;
+	}
+	spin_unlock(&inode->i_lock);
+	posix_acl_release(free);
+}
+
+struct generic_acl_operations shmem_acl_ops = {
+	.getacl = shmem_get_acl,
+	.setacl = shmem_set_acl,
+};
+
+/**
+ * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access,
+ * shmem_xattr_acl_access_handler  -  plumbing code to implement the
+ * system.posix_acl_access xattr using the generic acl functions.
+ */
+
+static size_t
+shmem_list_acl_access(struct inode *inode, char *list, size_t list_size,
+		      const char *name, size_t name_len)
+{
+	return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS,
+				list, list_size);
+}
+
+static int
+shmem_get_acl_access(struct inode *inode, const char *name, void *buffer,
+		     size_t size)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer,
+			       size);
+}
+
+static int
+shmem_set_acl_access(struct inode *inode, const char *name, const void *value,
+		     size_t size, int flags)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value,
+			       size);
+}
+
+struct xattr_handler shmem_xattr_acl_access_handler = {
+	.prefix = POSIX_ACL_XATTR_ACCESS,
+	.list	= shmem_list_acl_access,
+	.get	= shmem_get_acl_access,
+	.set	= shmem_set_acl_access,
+};
+
+/**
+ * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default,
+ * shmem_xattr_acl_default_handler  -  plumbing code to implement the
+ * system.posix_acl_default xattr using the generic acl functions.
+ */
+
+static size_t
+shmem_list_acl_default(struct inode *inode, char *list, size_t list_size,
+		       const char *name, size_t name_len)
+{
+	return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT,
+				list, list_size);
+}
+
+static int
+shmem_get_acl_default(struct inode *inode, const char *name, void *buffer,
+		      size_t size)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer,
+			       size);
+}
+
+static int
+shmem_set_acl_default(struct inode *inode, const char *name, const void *value,
+		      size_t size, int flags)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value,
+			       size);
+}
+
+struct xattr_handler shmem_xattr_acl_default_handler = {
+	.prefix = POSIX_ACL_XATTR_DEFAULT,
+	.list	= shmem_list_acl_default,
+	.get	= shmem_get_acl_default,
+	.set	= shmem_set_acl_default,
+};
+
+/**
+ * shmem_acl_init  -  Inizialize the acl(s) of a new inode
+ */
+int
+shmem_acl_init(struct inode *inode, struct inode *dir)
+{
+	return generic_acl_init(inode, dir, &shmem_acl_ops);
+}
+
+/**
+ * shmem_acl_destroy_inode  -  destroy acls hanging off the in-memory inode
+ *
+ * This is done before destroying the actual inode.
+ */
+
+void
+shmem_acl_destroy_inode(struct inode *inode)
+{
+	if (SHMEM_I(inode)->i_acl)
+		posix_acl_release(SHMEM_I(inode)->i_acl);
+	SHMEM_I(inode)->i_acl = NULL;
+	if (SHMEM_I(inode)->i_default_acl)
+		posix_acl_release(SHMEM_I(inode)->i_default_acl);
+	SHMEM_I(inode)->i_default_acl = NULL;
+}
+
+/**
+ * shmem_check_acl  -  check_acl() callback for generic_permission()
+ */
+static int
+shmem_check_acl(struct inode *inode, int mask)
+{
+	struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
+
+	if (acl) {
+		int error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+		return error;
+	}
+	return -EAGAIN;
+}
+
+/**
+ * shmem_permission  -  permission() inode operation
+ */
+int
+shmem_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	return generic_permission(inode, mask, shmem_check_acl);
+}
-- 
cgit v1.2.3


From 34596dc9e59d7bece16fe5aba08116b49465da26 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 30 Sep 2006 01:47:55 +0200
Subject: [PATCH] Define vsyscall cache as blob to make clearer that user space
 shouldn't use it

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/vsyscall.c |  8 ++++----
 include/linux/getcpu.h        | 12 +++++++-----
 kernel/sys.c                  |  8 ++++----
 3 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index ac48c3857ddb..07c086382059 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -155,8 +155,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 	   We do this here because otherwise user space would do it on
 	   its own in a likely inferior way (no access to jiffies).
 	   If you don't like it pass NULL. */
-	if (tcache && tcache->t0 == (j = __jiffies)) {
-		p = tcache->t1;
+	if (tcache && tcache->blob[0] == (j = __jiffies)) {
+		p = tcache->blob[1];
 	} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
 		/* Load per CPU data from RDTSCP */
 		rdtscp(dummy, dummy, p);
@@ -165,8 +165,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
 		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
 	}
 	if (tcache) {
-		tcache->t0 = j;
-		tcache->t1 = p;
+		tcache->blob[0] = j;
+		tcache->blob[1] = p;
 	}
 	if (cpu)
 		*cpu = p & 0xfff;
diff --git a/include/linux/getcpu.h b/include/linux/getcpu.h
index 031ed3780e45..c7372d7a97be 100644
--- a/include/linux/getcpu.h
+++ b/include/linux/getcpu.h
@@ -1,16 +1,18 @@
 #ifndef _LINUX_GETCPU_H
 #define _LINUX_GETCPU_H 1
 
-/* Cache for getcpu() to speed it up. Results might be upto a jiffie
+/* Cache for getcpu() to speed it up. Results might be a short time
    out of date, but will be faster.
+
    User programs should not refer to the contents of this structure.
-   It is only a cache for vgetcpu(). It might change in future kernels.
+   I repeat they should not refer to it. If they do they will break
+   in future kernels.
+
+   It is only a private cache for vgetcpu(). It will change in future kernels.
    The user program must store this information per thread (__thread)
    If you want 100% accurate information pass NULL instead. */
 struct getcpu_cache {
-	unsigned long t0;
-	unsigned long t1;
-	unsigned long res[4];
+	unsigned long blob[128 / sizeof(long)];
 };
 
 #endif
diff --git a/kernel/sys.c b/kernel/sys.c
index 8647061c084a..b88806c66244 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2083,12 +2083,12 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
 		 * padding
 		 */
 		unsigned long t0, t1;
-		get_user(t0, &cache->t0);
-		get_user(t1, &cache->t1);
+		get_user(t0, &cache->blob[0]);
+		get_user(t1, &cache->blob[1]);
 		t0++;
 		t1++;
-		put_user(t0, &cache->t0);
-		put_user(t1, &cache->t1);
+		put_user(t0, &cache->blob[0]);
+		put_user(t1, &cache->blob[1]);
 	}
 	return err ? -EFAULT : 0;
 }
-- 
cgit v1.2.3


From 95d4e6be25a68cd9fbe8c0d356b585504d8db1c7 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 29 Sep 2006 17:05:05 -0700
Subject: [NetLabel]: audit fixups due to delayed feedback

Fix some issues Steve Grubb had with the way NetLabel was using the audit
subsystem.  This should make NetLabel more consistent with other kernel
generated audit messages specifying configuration changes.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/audit.h              | 11 +++--
 include/net/cipso_ipv4.h           |  4 +-
 include/net/netlabel.h             |  8 +++-
 net/ipv4/cipso_ipv4.c              |  4 +-
 net/netlabel/netlabel_cipso_v4.c   | 48 +++++++++++++---------
 net/netlabel/netlabel_domainhash.c | 82 ++++++++++++++++++++------------------
 net/netlabel/netlabel_domainhash.h |  8 ++--
 net/netlabel/netlabel_mgmt.c       | 27 +++++++++----
 net/netlabel/netlabel_unlabeled.c  | 34 +++++++++++-----
 net/netlabel/netlabel_user.c       | 66 ++++--------------------------
 net/netlabel/netlabel_user.h       | 16 +++++++-
 11 files changed, 157 insertions(+), 151 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 42719d07612a..c3aa09751814 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -95,12 +95,11 @@
 #define AUDIT_MAC_POLICY_LOAD	1403	/* Policy file load */
 #define AUDIT_MAC_STATUS	1404	/* Changed enforcing,permissive,off */
 #define AUDIT_MAC_CONFIG_CHANGE	1405	/* Changes to booleans */
-#define AUDIT_MAC_UNLBL_ACCEPT	1406	/* NetLabel: allow unlabeled traffic */
-#define AUDIT_MAC_UNLBL_DENY	1407	/* NetLabel: deny unlabeled traffic */
-#define AUDIT_MAC_CIPSOV4_ADD	1408	/* NetLabel: add CIPSOv4 DOI entry */
-#define AUDIT_MAC_CIPSOV4_DEL	1409	/* NetLabel: del CIPSOv4 DOI entry */
-#define AUDIT_MAC_MAP_ADD	1410	/* NetLabel: add LSM domain mapping */
-#define AUDIT_MAC_MAP_DEL	1411	/* NetLabel: del LSM domain mapping */
+#define AUDIT_MAC_UNLBL_ALLOW	1406	/* NetLabel: allow unlabeled traffic */
+#define AUDIT_MAC_CIPSOV4_ADD	1407	/* NetLabel: add CIPSOv4 DOI entry */
+#define AUDIT_MAC_CIPSOV4_DEL	1408	/* NetLabel: del CIPSOv4 DOI entry */
+#define AUDIT_MAC_MAP_ADD	1409	/* NetLabel: add LSM domain mapping */
+#define AUDIT_MAC_MAP_DEL	1410	/* NetLabel: del LSM domain mapping */
 
 #define AUDIT_FIRST_KERN_ANOM_MSG   1700
 #define AUDIT_LAST_KERN_ANOM_MSG    1799
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 5d6ae1b2b196..718b4d9c891f 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -129,7 +129,7 @@ extern int cipso_v4_rbm_strictvalid;
 #ifdef CONFIG_NETLABEL
 int cipso_v4_doi_add(struct cipso_v4_doi *doi_def);
 int cipso_v4_doi_remove(u32 doi,
-			u32 audit_secid,
+			struct netlbl_audit *audit_info,
 			void (*callback) (struct rcu_head * head));
 struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi);
 int cipso_v4_doi_walk(u32 *skip_cnt,
@@ -145,7 +145,7 @@ static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def)
 }
 
 static inline int cipso_v4_doi_remove(u32 doi,
-				    u32 audit_secid,
+				    struct netlbl_audit *audit_info,
 				    void (*callback) (struct rcu_head * head))
 {
 	return 0;
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 190bfdbbdba6..c63a58058e21 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -92,11 +92,17 @@
  *
  */
 
+/* NetLabel audit information */
+struct netlbl_audit {
+	u32 secid;
+	uid_t loginuid;
+};
+
 /* Domain mapping definition struct */
 struct netlbl_dom_map;
 
 /* Domain mapping operations */
-int netlbl_domhsh_remove(const char *domain, u32 audit_secid);
+int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
 
 /* LSM security attributes */
 struct netlbl_lsm_cache {
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c4e469ff842d..a8e2e879a647 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -485,7 +485,7 @@ doi_add_failure_rlock:
  *
  */
 int cipso_v4_doi_remove(u32 doi,
-			u32 audit_secid,
+			struct netlbl_audit *audit_info,
 			void (*callback) (struct rcu_head * head))
 {
 	struct cipso_v4_doi *doi_def;
@@ -506,7 +506,7 @@ int cipso_v4_doi_remove(u32 doi,
 		list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list)
 			if (dom_iter->valid)
 				netlbl_domhsh_remove(dom_iter->domain,
-						     audit_secid);
+						     audit_info);
 		cipso_v4_cache_invalidate();
 		rcu_read_unlock();
 
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 09986ca962a6..a6ce1d6d5c59 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -384,11 +384,15 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
 	u32 doi;
 	const char *type_str = "(unknown)";
 	struct audit_buffer *audit_buf;
+	struct netlbl_audit audit_info;
 
 	if (!info->attrs[NLBL_CIPSOV4_A_DOI] ||
 	    !info->attrs[NLBL_CIPSOV4_A_MTYPE])
 		return -EINVAL;
 
+	doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+	netlbl_netlink_auditinfo(skb, &audit_info);
+
 	type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]);
 	switch (type) {
 	case CIPSO_V4_MAP_STD:
@@ -401,13 +405,14 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
 		break;
 	}
 
-	if (ret_val == 0) {
-		doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
-		audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
-						      NETLINK_CB(skb).sid);
-		audit_log_format(audit_buf, " doi=%u type=%s", doi, type_str);
-		audit_log_end(audit_buf);
-	}
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD,
+					      &audit_info);
+	audit_log_format(audit_buf,
+			 " cipso_doi=%u cipso_type=%s res=%u",
+			 doi,
+			 type_str,
+			 ret_val == 0 ? 1 : 0);
+	audit_log_end(audit_buf);
 
 	return ret_val;
 }
@@ -668,20 +673,25 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
 	int ret_val = -EINVAL;
 	u32 doi = 0;
 	struct audit_buffer *audit_buf;
+	struct netlbl_audit audit_info;
 
-	if (info->attrs[NLBL_CIPSOV4_A_DOI]) {
-		doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
-		ret_val = cipso_v4_doi_remove(doi,
-					      NETLINK_CB(skb).sid,
-					      netlbl_cipsov4_doi_free);
-	}
+	if (!info->attrs[NLBL_CIPSOV4_A_DOI])
+		return -EINVAL;
 
-	if (ret_val == 0) {
-		audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
-						      NETLINK_CB(skb).sid);
-		audit_log_format(audit_buf, " doi=%u", doi);
-		audit_log_end(audit_buf);
-	}
+	doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
+	netlbl_netlink_auditinfo(skb, &audit_info);
+
+	ret_val = cipso_v4_doi_remove(doi,
+				      &audit_info,
+				      netlbl_cipsov4_doi_free);
+
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL,
+					      &audit_info);
+	audit_log_format(audit_buf,
+			 " cipso_doi=%u res=%u",
+			 doi,
+			 ret_val == 0 ? 1 : 0);
+	audit_log_end(audit_buf);
 
 	return ret_val;
 }
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index d64e2ae3b129..af4371d3b459 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -188,7 +188,7 @@ int netlbl_domhsh_init(u32 size)
 /**
  * netlbl_domhsh_add - Adds a entry to the domain hash table
  * @entry: the entry to add
- * @audit_secid: the LSM secid to use in the audit message
+ * @audit_info: NetLabel audit information
  *
  * Description:
  * Adds a new entry to the domain hash table and handles any updates to the
@@ -196,7 +196,8 @@ int netlbl_domhsh_init(u32 size)
  * negative on failure.
  *
  */
-int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid)
+int netlbl_domhsh_add(struct netlbl_dom_map *entry,
+		      struct netlbl_audit *audit_info)
 {
 	int ret_val;
 	u32 bkt;
@@ -241,26 +242,26 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid)
 		spin_unlock(&netlbl_domhsh_def_lock);
 	} else
 		ret_val = -EINVAL;
-	if (ret_val == 0) {
-		if (entry->domain != NULL)
-			audit_domain = entry->domain;
-		else
-			audit_domain = "(default)";
-		audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD,
-						      audit_secid);
-		audit_log_format(audit_buf, " domain=%s", audit_domain);
-		switch (entry->type) {
-		case NETLBL_NLTYPE_UNLABELED:
-			audit_log_format(audit_buf, " protocol=unlbl");
-			break;
-		case NETLBL_NLTYPE_CIPSOV4:
-			audit_log_format(audit_buf,
-					 " protocol=cipsov4 doi=%u",
-					 entry->type_def.cipsov4->doi);
-			break;
-		}
-		audit_log_end(audit_buf);
+
+	if (entry->domain != NULL)
+		audit_domain = entry->domain;
+	else
+		audit_domain = "(default)";
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info);
+	audit_log_format(audit_buf, " nlbl_domain=%s", audit_domain);
+	switch (entry->type) {
+	case NETLBL_NLTYPE_UNLABELED:
+		audit_log_format(audit_buf, " nlbl_protocol=unlbl");
+		break;
+	case NETLBL_NLTYPE_CIPSOV4:
+		audit_log_format(audit_buf,
+				 " nlbl_protocol=cipsov4 cipso_doi=%u",
+				 entry->type_def.cipsov4->doi);
+		break;
 	}
+	audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0);
+	audit_log_end(audit_buf);
+
 	rcu_read_unlock();
 
 	if (ret_val != 0) {
@@ -279,7 +280,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid)
 /**
  * netlbl_domhsh_add_default - Adds the default entry to the domain hash table
  * @entry: the entry to add
- * @audit_secid: the LSM secid to use in the audit message
+ * @audit_info: NetLabel audit information
  *
  * Description:
  * Adds a new default entry to the domain hash table and handles any updates
@@ -287,15 +288,16 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid)
  * negative on failure.
  *
  */
-int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid)
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
+			      struct netlbl_audit *audit_info)
 {
-	return netlbl_domhsh_add(entry, audit_secid);
+	return netlbl_domhsh_add(entry, audit_info);
 }
 
 /**
  * netlbl_domhsh_remove - Removes an entry from the domain hash table
  * @domain: the domain to remove
- * @audit_secid: the LSM secid to use in the audit message
+ * @audit_info: NetLabel audit information
  *
  * Description:
  * Removes an entry from the domain hash table and handles any updates to the
@@ -303,7 +305,7 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid)
  * negative on failure.
  *
  */
-int netlbl_domhsh_remove(const char *domain, u32 audit_secid)
+int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info)
 {
 	int ret_val = -ENOENT;
 	struct netlbl_dom_map *entry;
@@ -345,18 +347,20 @@ int netlbl_domhsh_remove(const char *domain, u32 audit_secid)
 			ret_val = -ENOENT;
 		spin_unlock(&netlbl_domhsh_def_lock);
 	}
-	if (ret_val == 0) {
-		if (entry->domain != NULL)
-			audit_domain = entry->domain;
-		else
-			audit_domain = "(default)";
-		audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL,
-						      audit_secid);
-		audit_log_format(audit_buf, " domain=%s", audit_domain);
-		audit_log_end(audit_buf);
 
+	if (entry->domain != NULL)
+		audit_domain = entry->domain;
+	else
+		audit_domain = "(default)";
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info);
+	audit_log_format(audit_buf,
+			 " nlbl_domain=%s res=%u",
+			 audit_domain,
+			 ret_val == 0 ? 1 : 0);
+	audit_log_end(audit_buf);
+
+	if (ret_val == 0)
 		call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
-	}
 
 remove_return:
 	rcu_read_unlock();
@@ -365,7 +369,7 @@ remove_return:
 
 /**
  * netlbl_domhsh_remove_default - Removes the default entry from the table
- * @audit_secid: the LSM secid to use in the audit message
+ * @audit_info: NetLabel audit information
  *
  * Description:
  * Removes/resets the default entry for the domain hash table and handles any
@@ -373,9 +377,9 @@ remove_return:
  * success, non-zero on failure.
  *
  */
-int netlbl_domhsh_remove_default(u32 audit_secid)
+int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info)
 {
-	return netlbl_domhsh_remove(NULL, audit_secid);
+	return netlbl_domhsh_remove(NULL, audit_info);
 }
 
 /**
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index d50f13cacdca..3689956c3436 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -57,9 +57,11 @@ struct netlbl_dom_map {
 int netlbl_domhsh_init(u32 size);
 
 /* Manipulate the domain hash table */
-int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid);
-int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid);
-int netlbl_domhsh_remove_default(u32 audit_secid);
+int netlbl_domhsh_add(struct netlbl_dom_map *entry,
+		      struct netlbl_audit *audit_info);
+int netlbl_domhsh_add_default(struct netlbl_dom_map *entry,
+			      struct netlbl_audit *audit_info);
+int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
 struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
 int netlbl_domhsh_walk(u32 *skip_bkt,
 		     u32 *skip_chain,
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 0ac314f18ad1..53c9079ad2c3 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -87,11 +87,14 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
 	struct netlbl_dom_map *entry = NULL;
 	size_t tmp_size;
 	u32 tmp_val;
+	struct netlbl_audit audit_info;
 
 	if (!info->attrs[NLBL_MGMT_A_DOMAIN] ||
 	    !info->attrs[NLBL_MGMT_A_PROTOCOL])
 		goto add_failure;
 
+	netlbl_netlink_auditinfo(skb, &audit_info);
+
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (entry == NULL) {
 		ret_val = -ENOMEM;
@@ -108,7 +111,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
 
 	switch (entry->type) {
 	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid);
+		ret_val = netlbl_domhsh_add(entry, &audit_info);
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
 		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
@@ -125,7 +128,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
 			rcu_read_unlock();
 			goto add_failure;
 		}
-		ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid);
+		ret_val = netlbl_domhsh_add(entry, &audit_info);
 		rcu_read_unlock();
 		break;
 	default:
@@ -156,12 +159,15 @@ add_failure:
 static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
 {
 	char *domain;
+	struct netlbl_audit audit_info;
 
 	if (!info->attrs[NLBL_MGMT_A_DOMAIN])
 		return -EINVAL;
 
+	netlbl_netlink_auditinfo(skb, &audit_info);
+
 	domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]);
-	return netlbl_domhsh_remove(domain, NETLINK_CB(skb).sid);
+	return netlbl_domhsh_remove(domain, &audit_info);
 }
 
 /**
@@ -264,10 +270,13 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 	int ret_val = -EINVAL;
 	struct netlbl_dom_map *entry = NULL;
 	u32 tmp_val;
+	struct netlbl_audit audit_info;
 
 	if (!info->attrs[NLBL_MGMT_A_PROTOCOL])
 		goto adddef_failure;
 
+	netlbl_netlink_auditinfo(skb, &audit_info);
+
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (entry == NULL) {
 		ret_val = -ENOMEM;
@@ -277,8 +286,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 
 	switch (entry->type) {
 	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = netlbl_domhsh_add_default(entry,
-						    NETLINK_CB(skb).sid);
+		ret_val = netlbl_domhsh_add_default(entry, &audit_info);
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
 		if (!info->attrs[NLBL_MGMT_A_CV4DOI])
@@ -295,8 +303,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
 			rcu_read_unlock();
 			goto adddef_failure;
 		}
-		ret_val = netlbl_domhsh_add_default(entry,
-						    NETLINK_CB(skb).sid);
+		ret_val = netlbl_domhsh_add_default(entry, &audit_info);
 		rcu_read_unlock();
 		break;
 	default:
@@ -324,7 +331,11 @@ adddef_failure:
  */
 static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
 {
-	return netlbl_domhsh_remove_default(NETLINK_CB(skb).sid);
+	struct netlbl_audit audit_info;
+
+	netlbl_netlink_auditinfo(skb, &audit_info);
+
+	return netlbl_domhsh_remove_default(&audit_info);
 }
 
 /**
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index ab36675fee8c..1833ad233b39 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -70,18 +70,25 @@ static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
 /**
  * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag
  * @value: desired value
- * @audit_secid: the LSM secid to use in the audit message
+ * @audit_info: NetLabel audit information
  *
  * Description:
  * Set the value of the unlabeled accept flag to @value.
  *
  */
-static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid)
+static void netlbl_unlabel_acceptflg_set(u8 value,
+					 struct netlbl_audit *audit_info)
 {
+	struct audit_buffer *audit_buf;
+	u8 old_val;
+
+	old_val = atomic_read(&netlabel_unlabel_accept_flg);
 	atomic_set(&netlabel_unlabel_accept_flg, value);
-	netlbl_audit_nomsg((value ?
-			    AUDIT_MAC_UNLBL_ACCEPT : AUDIT_MAC_UNLBL_DENY),
-			   audit_secid);
+
+	audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW,
+					      audit_info);
+	audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val);
+	audit_log_end(audit_buf);
 }
 
 /*
@@ -101,12 +108,13 @@ static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid)
 static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
 {
 	u8 value;
+	struct netlbl_audit audit_info;
 
 	if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
 		value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
 		if (value == 1 || value == 0) {
-			netlbl_unlabel_acceptflg_set(value,
-						     NETLINK_CB(skb).sid);
+			netlbl_netlink_auditinfo(skb, &audit_info);
+			netlbl_unlabel_acceptflg_set(value, &audit_info);
 			return 0;
 		}
 	}
@@ -250,19 +258,23 @@ int netlbl_unlabel_defconf(void)
 {
 	int ret_val;
 	struct netlbl_dom_map *entry;
-	u32 secid;
+	struct netlbl_audit audit_info;
 
-	security_task_getsecid(current, &secid);
+	/* Only the kernel is allowed to call this function and the only time
+	 * it is called is at bootup before the audit subsystem is reporting
+	 * messages so don't worry to much about these values. */
+	security_task_getsecid(current, &audit_info.secid);
+	audit_info.loginuid = 0;
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (entry == NULL)
 		return -ENOMEM;
 	entry->type = NETLBL_NLTYPE_UNLABELED;
-	ret_val = netlbl_domhsh_add_default(entry, secid);
+	ret_val = netlbl_domhsh_add_default(entry, &audit_info);
 	if (ret_val != 0)
 		return ret_val;
 
-	netlbl_unlabel_acceptflg_set(1, secid);
+	netlbl_unlabel_acceptflg_set(1, &audit_info);
 
 	return 0;
 }
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index c2343af584cb..98a416381e61 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -85,7 +85,7 @@ int netlbl_netlink_init(void)
 /**
  * netlbl_audit_start_common - Start an audit message
  * @type: audit message type
- * @secid: LSM context ID
+ * @audit_info: NetLabel audit information
  *
  * Description:
  * Start an audit message using the type specified in @type and fill the audit
@@ -93,14 +93,11 @@ int netlbl_netlink_init(void)
  * a pointer to the audit buffer on success, NULL on failure.
  *
  */
-struct audit_buffer *netlbl_audit_start_common(int type, u32 secid)
+struct audit_buffer *netlbl_audit_start_common(int type,
+					       struct netlbl_audit *audit_info)
 {
 	struct audit_context *audit_ctx = current->audit_context;
 	struct audit_buffer *audit_buf;
-	uid_t audit_loginuid;
-	const char *audit_tty;
-	char audit_comm[sizeof(current->comm)];
-	struct vm_area_struct *vma;
 	char *secctx;
 	u32 secctx_len;
 
@@ -108,60 +105,13 @@ struct audit_buffer *netlbl_audit_start_common(int type, u32 secid)
 	if (audit_buf == NULL)
 		return NULL;
 
-	audit_loginuid = audit_get_loginuid(audit_ctx);
-	if (current->signal &&
-	    current->signal->tty &&
-	    current->signal->tty->name)
-		audit_tty = current->signal->tty->name;
-	else
-		audit_tty = "(none)";
-	get_task_comm(audit_comm, current);
+	audit_log_format(audit_buf, "netlabel: auid=%u", audit_info->loginuid);
 
-	audit_log_format(audit_buf,
-			 "netlabel: auid=%u uid=%u tty=%s pid=%d",
-			 audit_loginuid,
-			 current->uid,
-			 audit_tty,
-			 current->pid);
-	audit_log_format(audit_buf, " comm=");
-	audit_log_untrustedstring(audit_buf, audit_comm);
-	if (current->mm) {
-		down_read(&current->mm->mmap_sem);
-		vma = current->mm->mmap;
-		while (vma) {
-			if ((vma->vm_flags & VM_EXECUTABLE) &&
-			    vma->vm_file) {
-				audit_log_d_path(audit_buf,
-						 " exe=",
-						 vma->vm_file->f_dentry,
-						 vma->vm_file->f_vfsmnt);
-				break;
-			}
-			vma = vma->vm_next;
-		}
-		up_read(&current->mm->mmap_sem);
-	}
-
-	if (secid != 0 &&
-	    security_secid_to_secctx(secid, &secctx, &secctx_len) == 0)
+	if (audit_info->secid != 0 &&
+	    security_secid_to_secctx(audit_info->secid,
+				     &secctx,
+				     &secctx_len) == 0)
 		audit_log_format(audit_buf, " subj=%s", secctx);
 
 	return audit_buf;
 }
-
-/**
- * netlbl_audit_nomsg - Send an audit message without additional text
- * @type: audit message type
- * @secid: LSM context ID
- *
- * Description:
- * Send an audit message with only the common NetLabel audit fields.
- *
- */
-void netlbl_audit_nomsg(int type, u32 secid)
-{
-	struct audit_buffer *audit_buf;
-
-	audit_buf = netlbl_audit_start_common(type, secid);
-	audit_log_end(audit_buf);
-}
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index ab840acfc964..47967ef32964 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -72,13 +72,25 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb,
 			   NETLBL_PROTO_VERSION);
 }
 
+/**
+ * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg
+ * @skb: the packet
+ * @audit_info: NetLabel audit information
+ */
+static inline void netlbl_netlink_auditinfo(struct sk_buff *skb,
+					    struct netlbl_audit *audit_info)
+{
+	audit_info->secid = NETLINK_CB(skb).sid;
+	audit_info->loginuid = NETLINK_CB(skb).loginuid;
+}
+
 /* NetLabel NETLINK I/O functions */
 
 int netlbl_netlink_init(void);
 
 /* NetLabel Audit Functions */
 
-struct audit_buffer *netlbl_audit_start_common(int type, u32 secid);
-void netlbl_audit_nomsg(int type, u32 secid);
+struct audit_buffer *netlbl_audit_start_common(int type,
+					      struct netlbl_audit *audit_info);
 
 #endif
-- 
cgit v1.2.3


From f9317a40c4e09e20ef01601fc9f5de9e6acb5b96 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Fri, 29 Sep 2006 17:06:23 -0700
Subject: [BNX2]: Disable MSI on 5706 if AMD 8132 bridge is present.

MSI is defined to be 32-bit write.  The 5706 does 64-bit MSI writes
with byte enables disabled on the unused 32-bit word.  This is legal
but causes problems on the AMD 8132 which will eventually stop
responding after a while.

Without this patch, the MSI test done by the driver during open will
pass, but MSI will eventually stop working after a few MSIs are
written by the device.

AMD believes this incompatibility is unique to the 5706, and
prefers to locally disable MSI rather than globally disabling it
using pci_msi_quirk.

Update version to 1.4.45.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c      | 32 ++++++++++++++++++++++++++++++--
 include/linux/pci_ids.h |  1 +
 2 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 7fcf015021ec..6b4edb63c4c4 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -56,8 +56,8 @@
 
 #define DRV_MODULE_NAME		"bnx2"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"1.4.44"
-#define DRV_MODULE_RELDATE	"August 10, 2006"
+#define DRV_MODULE_VERSION	"1.4.45"
+#define DRV_MODULE_RELDATE	"September 29, 2006"
 
 #define RUN_AT(x) (jiffies + (x))
 
@@ -5805,6 +5805,34 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 		bp->cmd_ticks_int = bp->cmd_ticks;
 	}
 
+	/* Disable MSI on 5706 if AMD 8132 bridge is found.
+	 *
+	 * MSI is defined to be 32-bit write.  The 5706 does 64-bit MSI writes
+	 * with byte enables disabled on the unused 32-bit word.  This is legal
+	 * but causes problems on the AMD 8132 which will eventually stop
+	 * responding after a while.
+	 *
+	 * AMD believes this incompatibility is unique to the 5706, and
+	 * prefers to locally disable MSI rather than globally disabling it
+	 * using pci_msi_quirk.
+	 */
+	if (CHIP_NUM(bp) == CHIP_NUM_5706 && disable_msi == 0) {
+		struct pci_dev *amd_8132 = NULL;
+
+		while ((amd_8132 = pci_get_device(PCI_VENDOR_ID_AMD,
+						  PCI_DEVICE_ID_AMD_8132_BRIDGE,
+						  amd_8132))) {
+			u8 rev;
+
+			pci_read_config_byte(amd_8132, PCI_REVISION_ID, &rev);
+			if (rev >= 0x10 && rev <= 0x13) {
+				disable_msi = 1;
+				pci_dev_put(amd_8132);
+				break;
+			}
+		}
+	}
+
 	bp->autoneg = AUTONEG_SPEED | AUTONEG_FLOW_CTRL;
 	bp->req_line_speed = 0;
 	if (bp->phy_flags & PHY_SERDES_FLAG) {
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index b7e85ff045ea..c9ffbc3843d5 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -507,6 +507,7 @@
 #define PCI_DEVICE_ID_AMD_8151_0	0x7454
 #define PCI_DEVICE_ID_AMD_8131_BRIDGE	0x7450
 #define PCI_DEVICE_ID_AMD_8131_APIC	0x7451
+#define PCI_DEVICE_ID_AMD_8132_BRIDGE	0x7458
 #define PCI_DEVICE_ID_AMD_CS5536_ISA    0x2090
 #define PCI_DEVICE_ID_AMD_CS5536_FLASH  0x2091
 #define PCI_DEVICE_ID_AMD_CS5536_AUDIO  0x2093
-- 
cgit v1.2.3


From 1c9d3e72a7164c590437f2ab6c2c4f6da91f1703 Mon Sep 17 00:00:00 2001
From: Chas Williams <chas@cmf.nrl.navy.mil>
Date: Fri, 29 Sep 2006 17:13:24 -0700
Subject: [ATM]: [lec] header indent, comment and whitespace cleanup

Signed-off-by: Chas Williams <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmlec.h | 119 ++++++++++++++++++----------------
 net/atm/lec.h          | 172 +++++++++++++++++++++++++++----------------------
 net/atm/lec_arpc.h     | 148 ++++++++++++++++++++++--------------------
 3 files changed, 235 insertions(+), 204 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atmlec.h b/include/linux/atmlec.h
index f267f2442766..6f5a1bab8f50 100644
--- a/include/linux/atmlec.h
+++ b/include/linux/atmlec.h
@@ -1,9 +1,7 @@
 /*
- * 
- * ATM Lan Emulation Daemon vs. driver interface
- *
- * mkiiskila@yahoo.com
+ * ATM Lan Emulation Daemon driver interface
  *
+ * Marko Kiiskila <mkiiskila@yahoo.com>
  */
 
 #ifndef _ATMLEC_H_
@@ -13,76 +11,87 @@
 #include <linux/atmioc.h>
 #include <linux/atm.h>
 #include <linux/if_ether.h>
+
 /* ATM lec daemon control socket */
-#define ATMLEC_CTRL _IO('a',ATMIOC_LANE)
-#define ATMLEC_DATA _IO('a',ATMIOC_LANE+1)
-#define ATMLEC_MCAST _IO('a',ATMIOC_LANE+2)
+#define ATMLEC_CTRL	_IO('a', ATMIOC_LANE)
+#define ATMLEC_DATA	_IO('a', ATMIOC_LANE+1)
+#define ATMLEC_MCAST	_IO('a', ATMIOC_LANE+2)
 
 /* Maximum number of LEC interfaces (tweakable) */
 #define MAX_LEC_ITF 48
 
-/* From the total of MAX_LEC_ITF, last NUM_TR_DEVS are reserved for Token Ring.
+/*
+ * From the total of MAX_LEC_ITF, last NUM_TR_DEVS are reserved for Token Ring.
  * E.g. if MAX_LEC_ITF = 48 and NUM_TR_DEVS = 8, then lec0-lec39 are for
  * Ethernet ELANs and lec40-lec47 are for Token Ring ELANS.
  */
 #define NUM_TR_DEVS 8
 
-typedef enum { 
-        l_set_mac_addr,   l_del_mac_addr, 
-        l_svc_setup, 
-        l_addr_delete,    l_topology_change, 
-        l_flush_complete, l_arp_update,
-        l_narp_req, /* LANE2 mandates the use of this */
-        l_config,         l_flush_tran_id, 
-        l_set_lecid,      l_arp_xmt,
-        l_rdesc_arp_xmt,
-        l_associate_req,
-        l_should_bridge   /* should we bridge this MAC? */
+typedef enum {
+	l_set_mac_addr,
+	l_del_mac_addr,
+	l_svc_setup,
+	l_addr_delete,
+	l_topology_change,
+	l_flush_complete,
+	l_arp_update,
+	l_narp_req,		/* LANE2 mandates the use of this */
+	l_config,
+	l_flush_tran_id,
+	l_set_lecid,
+	l_arp_xmt,
+	l_rdesc_arp_xmt,
+	l_associate_req,
+	l_should_bridge		/* should we bridge this MAC? */
 } atmlec_msg_type;
 
 #define ATMLEC_MSG_TYPE_MAX l_should_bridge
 
 struct atmlec_config_msg {
-        unsigned int maximum_unknown_frame_count;
-        unsigned int max_unknown_frame_time;
-        unsigned short max_retry_count;
-        unsigned int aging_time;
-        unsigned int forward_delay_time;
-        unsigned int arp_response_time;
-        unsigned int flush_timeout;
-        unsigned int path_switching_delay;
-        unsigned int  lane_version; /* LANE2: 1 for LANEv1, 2 for LANEv2 */
-        int mtu;
-        int is_proxy;
+	unsigned int maximum_unknown_frame_count;
+	unsigned int max_unknown_frame_time;
+	unsigned short max_retry_count;
+	unsigned int aging_time;
+	unsigned int forward_delay_time;
+	unsigned int arp_response_time;
+	unsigned int flush_timeout;
+	unsigned int path_switching_delay;
+	unsigned int lane_version;	/* LANE2: 1 for LANEv1, 2 for LANEv2 */
+	int mtu;
+	int is_proxy;
 };
- 
+
 struct atmlec_msg {
-        atmlec_msg_type type;
-        int             sizeoftlvs;        /* LANE2: if != 0, tlvs follow */ 
-        union {
-                struct {
-                        unsigned char mac_addr[ETH_ALEN];
-                        unsigned char atm_addr[ATM_ESA_LEN];
-                        unsigned int flag;/* Topology_change flag, 
-                                              remoteflag, permanent flag,
-                                              lecid, transaction id */
-                        unsigned int targetless_le_arp; /* LANE2 */
-                        unsigned int no_source_le_narp; /* LANE2 */
-                } normal;
-                struct atmlec_config_msg config;
-                struct {
-                        uint16_t lec_id;                     /* requestor lec_id  */
-                        uint32_t tran_id;                    /* transaction id    */
-                        unsigned char mac_addr[ETH_ALEN];    /* dst mac addr      */
-                        unsigned char atm_addr[ATM_ESA_LEN]; /* reqestor ATM addr */
-                } proxy;
-		     /* For mapping LE_ARP requests to responses. Filled by */
-        } content;       /* zeppelin, returned by kernel. Used only when proxying */ 
+	atmlec_msg_type type;
+	int sizeoftlvs;		/* LANE2: if != 0, tlvs follow */
+	union {
+		struct {
+			unsigned char mac_addr[ETH_ALEN];
+			unsigned char atm_addr[ATM_ESA_LEN];
+			unsigned int flag;	/*
+						 * Topology_change flag,
+						 * remoteflag, permanent flag,
+						 * lecid, transaction id
+						 */
+			unsigned int targetless_le_arp;	/* LANE2 */
+			unsigned int no_source_le_narp;	/* LANE2 */
+		} normal;
+		struct atmlec_config_msg config;
+		struct {
+			uint16_t lec_id;			/* requestor lec_id  */
+			uint32_t tran_id;			/* transaction id    */
+			unsigned char mac_addr[ETH_ALEN];	/* dst mac addr      */
+			unsigned char atm_addr[ATM_ESA_LEN];	/* reqestor ATM addr */
+		} proxy;	/*
+				 * For mapping LE_ARP requests to responses. Filled by
+				 * zeppelin, returned by kernel. Used only when proxying
+				 */
+	} content;
 } __ATM_API_ALIGN;
 
 struct atmlec_ioc {
-        int dev_num;
-        unsigned char atm_addr[ATM_ESA_LEN];
-        unsigned char receive;    /* 1= receive vcc, 0 = send vcc */
+	int dev_num;
+	unsigned char atm_addr[ATM_ESA_LEN];
+	unsigned char receive;	/* 1= receive vcc, 0 = send vcc */
 };
 #endif /* _ATMLEC_H_ */
diff --git a/net/atm/lec.h b/net/atm/lec.h
index c22a8bfa1f81..c700472f64df 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -1,14 +1,13 @@
 /*
- *
  * Lan Emulation client header file
  *
- * Marko Kiiskila mkiiskila@yahoo.com
- *
+ * Marko Kiiskila <mkiiskila@yahoo.com>
  */
 
 #ifndef _LEC_H_
 #define _LEC_H_
 
+#include <linux/config.h>
 #include <linux/atmdev.h>
 #include <linux/netdevice.h>
 #include <linux/atmlec.h>
@@ -16,18 +15,18 @@
 #define LEC_HEADER_LEN 16
 
 struct lecdatahdr_8023 {
-  unsigned short le_header;
-  unsigned char h_dest[ETH_ALEN];
-  unsigned char h_source[ETH_ALEN];
-  unsigned short h_type;
+	unsigned short le_header;
+	unsigned char h_dest[ETH_ALEN];
+	unsigned char h_source[ETH_ALEN];
+	unsigned short h_type;
 };
 
 struct lecdatahdr_8025 {
-  unsigned short le_header;
-  unsigned char ac_pad;
-  unsigned char fc;
-  unsigned char h_dest[ETH_ALEN];
-  unsigned char h_source[ETH_ALEN];
+	unsigned short le_header;
+	unsigned char ac_pad;
+	unsigned char fc;
+	unsigned char h_dest[ETH_ALEN];
+	unsigned char h_source[ETH_ALEN];
 };
 
 #define LEC_MINIMUM_8023_SIZE   62
@@ -44,17 +43,18 @@ struct lecdatahdr_8025 {
  *
  */
 struct lane2_ops {
-	int  (*resolve)(struct net_device *dev, u8 *dst_mac, int force,
-                        u8 **tlvs, u32 *sizeoftlvs);
-        int  (*associate_req)(struct net_device *dev, u8 *lan_dst,
-                              u8 *tlvs, u32 sizeoftlvs);
-	void (*associate_indicator)(struct net_device *dev, u8 *mac_addr,
-                                    u8 *tlvs, u32 sizeoftlvs);
+	int (*resolve) (struct net_device *dev, u8 *dst_mac, int force,
+			u8 **tlvs, u32 *sizeoftlvs);
+	int (*associate_req) (struct net_device *dev, u8 *lan_dst,
+			      u8 *tlvs, u32 sizeoftlvs);
+	void (*associate_indicator) (struct net_device *dev, u8 *mac_addr,
+				     u8 *tlvs, u32 sizeoftlvs);
 };
 
 /*
  * ATM LAN Emulation supports both LLC & Dix Ethernet EtherType
  * frames. 
+ *
  * 1. Dix Ethernet EtherType frames encoded by placing EtherType
  *    field in h_type field. Data follows immediatelly after header.
  * 2. LLC Data frames whose total length, including LLC field and data,
@@ -70,72 +70,88 @@ struct lane2_ops {
 #define LEC_ARP_TABLE_SIZE 16
 
 struct lec_priv {
-        struct net_device_stats stats;
-        unsigned short lecid;      /* Lecid of this client */
-        struct lec_arp_table *lec_arp_empty_ones;
-        /* Used for storing VCC's that don't have a MAC address attached yet */
-        struct lec_arp_table *lec_arp_tables[LEC_ARP_TABLE_SIZE];
-        /* Actual LE ARP table */
-        struct lec_arp_table *lec_no_forward;
-        /* Used for storing VCC's (and forward packets from) which are to
-           age out by not using them to forward packets. 
-           This is because to some LE clients there will be 2 VCCs. Only
-           one of them gets used. */
-        struct lec_arp_table *mcast_fwds;
-        /* With LANEv2 it is possible that BUS (or a special multicast server)
-           establishes multiple Multicast Forward VCCs to us. This list
-           collects all those VCCs. LANEv1 client has only one item in this
-           list. These entries are not aged out. */
-        spinlock_t lec_arp_lock;
-        struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */
-        struct atm_vcc *lecd;
-        struct timer_list lec_arp_timer;
-        /* C10 */
-        unsigned int maximum_unknown_frame_count;
-/* Within the period of time defined by this variable, the client will send 
-   no more than C10 frames to BUS for a given unicast destination. (C11) */
-        unsigned long max_unknown_frame_time;
-/* If no traffic has been sent in this vcc for this period of time,
-   vcc will be torn down (C12)*/
-        unsigned long vcc_timeout_period;
-/* An LE Client MUST not retry an LE_ARP_REQUEST for a 
-   given frame's LAN Destination more than maximum retry count times,
-   after the first LEC_ARP_REQUEST (C13)*/
-        unsigned short max_retry_count;
-/* Max time the client will maintain an entry in its arp cache in
-   absence of a verification of that relationship (C17)*/
-        unsigned long aging_time;
-/* Max time the client will maintain an entry in cache when
-   topology change flag is true (C18) */
-        unsigned long forward_delay_time;
-/* Topology change flag  (C19)*/
-        int topology_change;
-/* Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE
-   cycle to take (C20)*/
-        unsigned long arp_response_time;
-/* Time limit ot wait to receive an LE_FLUSH_RESPONSE after the
-   LE_FLUSH_REQUEST has been sent before taking recover action. (C21)*/
-        unsigned long flush_timeout;
-/* The time since sending a frame to the bus after which the
-   LE Client may assume that the frame has been either discarded or
-   delivered to the recipient (C22) */
-        unsigned long path_switching_delay;
+	struct net_device_stats stats;
+	unsigned short lecid;			/* Lecid of this client */
+	struct lec_arp_table *lec_arp_empty_ones;
+						/* Used for storing VCC's that don't have a MAC address attached yet */
+	struct lec_arp_table *lec_arp_tables[LEC_ARP_TABLE_SIZE];
+						/* Actual LE ARP table */
+	struct lec_arp_table *lec_no_forward;
+						/*
+						 * Used for storing VCC's (and forward packets from) which are to
+						 * age out by not using them to forward packets.
+						 * This is because to some LE clients there will be 2 VCCs. Only
+						 * one of them gets used.
+						 */
+	struct lec_arp_table *mcast_fwds;
+						/*
+						 * With LANEv2 it is possible that BUS (or a special multicast server)
+						 * establishes multiple Multicast Forward VCCs to us. This list
+						 * collects all those VCCs. LANEv1 client has only one item in this
+						 * list. These entries are not aged out.
+						 */
+	spinlock_t lec_arp_lock;
+	struct atm_vcc *mcast_vcc;		/* Default Multicast Send VCC */
+	struct atm_vcc *lecd;
+	struct timer_list lec_arp_timer;	/* C10 */
+	unsigned int maximum_unknown_frame_count;
+						/*
+						 * Within the period of time defined by this variable, the client will send
+						 * no more than C10 frames to BUS for a given unicast destination. (C11)
+						 */
+	unsigned long max_unknown_frame_time;
+						/*
+						 * If no traffic has been sent in this vcc for this period of time,
+						 * vcc will be torn down (C12)
+						 */
+	unsigned long vcc_timeout_period;
+						/*
+						 * An LE Client MUST not retry an LE_ARP_REQUEST for a
+						 * given frame's LAN Destination more than maximum retry count times,
+						 * after the first LEC_ARP_REQUEST (C13)
+						 */
+	unsigned short max_retry_count;
+						/*
+						 * Max time the client will maintain an entry in its arp cache in
+						 * absence of a verification of that relationship (C17)
+						 */
+	unsigned long aging_time;
+						/*
+						 * Max time the client will maintain an entry in cache when
+						 * topology change flag is true (C18)
+						 */
+	unsigned long forward_delay_time;	/* Topology change flag (C19) */
+	int topology_change;
+						/*
+						 * Max time the client expects an LE_ARP_REQUEST/LE_ARP_RESPONSE
+						 * cycle to take (C20)
+						 */
+	unsigned long arp_response_time;
+						/*
+						 * Time limit ot wait to receive an LE_FLUSH_RESPONSE after the
+						 * LE_FLUSH_REQUEST has been sent before taking recover action. (C21)
+						 */
+	unsigned long flush_timeout;
+						/* The time since sending a frame to the bus after which the
+						 * LE Client may assume that the frame has been either discarded or
+						 * delivered to the recipient (C22)
+						 */
+	unsigned long path_switching_delay;
 
-        u8 *tlvs;          /* LANE2: TLVs are new                */
-        u32 sizeoftlvs;    /* The size of the tlv array in bytes */
-        int lane_version;  /* LANE2                              */
-	int itfnum;        /* e.g. 2 for lec2, 5 for lec5        */
-        struct lane2_ops *lane2_ops; /* can be NULL for LANE v1  */
-        int is_proxy;      /* bridge between ATM and Ethernet    */
-        int is_trdev;      /* Device type, 0 = Ethernet, 1 = TokenRing */
+	u8 *tlvs;				/* LANE2: TLVs are new */
+	u32 sizeoftlvs;				/* The size of the tlv array in bytes */
+	int lane_version;			/* LANE2 */
+	int itfnum;				/* e.g. 2 for lec2, 5 for lec5 */
+	struct lane2_ops *lane2_ops;		/* can be NULL for LANE v1 */
+	int is_proxy;				/* bridge between ATM and Ethernet */
+	int is_trdev;				/* Device type, 0 = Ethernet, 1 = TokenRing */
 };
 
 struct lec_vcc_priv {
-	void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb);
+	void (*old_pop) (struct atm_vcc *vcc, struct sk_buff *skb);
 	int xoff;
 };
 
 #define LEC_VCC_PRIV(vcc)	((struct lec_vcc_priv *)((vcc)->user_back))
 
-#endif /* _LEC_H_ */
-
+#endif				/* _LEC_H_ */
diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h
index 397448094648..0230ca148c77 100644
--- a/net/atm/lec_arpc.h
+++ b/net/atm/lec_arpc.h
@@ -1,92 +1,98 @@
 /*
  * Lec arp cache
- * Marko Kiiskila mkiiskila@yahoo.com
  *
+ * Marko Kiiskila <mkiiskila@yahoo.com>
  */
-#ifndef _LEC_ARP_H
-#define _LEC_ARP_H
+#ifndef _LEC_ARP_H_
+#define _LEC_ARP_H_
 #include <linux/atm.h>
 #include <linux/atmdev.h>
 #include <linux/if_ether.h>
 #include <linux/atmlec.h>
 
 struct lec_arp_table {
-        struct lec_arp_table *next;          /* Linked entry list */
-        unsigned char atm_addr[ATM_ESA_LEN]; /* Atm address */
-        unsigned char mac_addr[ETH_ALEN];    /* Mac address */
-        int is_rdesc;                        /* Mac address is a route descriptor */
-        struct atm_vcc *vcc;                 /* Vcc this entry is attached */
-        struct atm_vcc *recv_vcc;            /* Vcc we receive data from */
-        void (*old_push)(struct atm_vcc *vcc,struct sk_buff *skb); 
-                                             /* Push that leads to daemon */
-        void (*old_recv_push)(struct atm_vcc *vcc, struct sk_buff *skb);
-                                             /* Push that leads to daemon */
-        void (*old_close)(struct atm_vcc *vcc);
-                                             /* We want to see when this
-                                              * vcc gets closed */
-        unsigned long last_used;             /* For expiry */
-        unsigned long timestamp;             /* Used for various timestamping
-                                              * things:
-                                              * 1. FLUSH started 
-                                              *    (status=ESI_FLUSH_PENDING)
-                                              * 2. Counting to 
-                                              *    max_unknown_frame_time
-                                              *    (status=ESI_ARP_PENDING||
-                                              *     status=ESI_VC_PENDING)
-                                              */
-        unsigned char no_tries;              /* No of times arp retry has been 
-                                                tried */
-        unsigned char status;                /* Status of this entry */
-        unsigned short flags;                /* Flags for this entry */
-        unsigned short packets_flooded;      /* Data packets flooded */
-        unsigned long flush_tran_id;         /* Transaction id in flush protocol */
-        struct timer_list timer;             /* Arping timer */
-        struct lec_priv *priv;               /* Pointer back */
+	struct lec_arp_table *next;	/* Linked entry list */
+	unsigned char atm_addr[ATM_ESA_LEN];	/* Atm address */
+	unsigned char mac_addr[ETH_ALEN];	/* Mac address */
+	int is_rdesc;			/* Mac address is a route descriptor */
+	struct atm_vcc *vcc;		/* Vcc this entry is attached */
+	struct atm_vcc *recv_vcc;	/* Vcc we receive data from */
 
-        u8  *tlvs;             /* LANE2: Each MAC address can have TLVs    */
-        u32 sizeoftlvs;        /* associated with it. sizeoftlvs tells the */
-                               /* the length of the tlvs array             */
-        struct sk_buff_head tx_wait; /* wait queue for outgoing packets    */
+	void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb);
+					/* Push that leads to daemon */
+
+	void (*old_recv_push) (struct atm_vcc *vcc, struct sk_buff *skb);
+					/* Push that leads to daemon */
+
+	void (*old_close) (struct atm_vcc *vcc);
+					/* We want to see when this vcc gets closed */
+
+	unsigned long last_used;	/* For expiry */
+	unsigned long timestamp;	/* Used for various timestamping things:
+					 * 1. FLUSH started
+					 *    (status=ESI_FLUSH_PENDING)
+					 * 2. Counting to
+					 *    max_unknown_frame_time
+					 *    (status=ESI_ARP_PENDING||
+					 *     status=ESI_VC_PENDING)
+					 */
+	unsigned char no_tries;		/* No of times arp retry has been tried */
+	unsigned char status;		/* Status of this entry */
+	unsigned short flags;		/* Flags for this entry */
+	unsigned short packets_flooded;	/* Data packets flooded */
+	unsigned long flush_tran_id;	/* Transaction id in flush protocol */
+	struct timer_list timer;	/* Arping timer */
+	struct lec_priv *priv;		/* Pointer back */
+	u8 *tlvs;
+	u32 sizeoftlvs;			/*
+					 * LANE2: Each MAC address can have TLVs
+					 * associated with it.  sizeoftlvs tells the
+					 * the length of the tlvs array
+					 */
+	struct sk_buff_head tx_wait;	/* wait queue for outgoing packets */
 };
 
-struct tlv {                   /* LANE2: Template tlv struct for accessing */
-                               /* the tlvs in the lec_arp_table->tlvs array*/
-        u32 type;
-        u8  length;
-        u8  value[255];
+/*
+ * LANE2: Template tlv struct for accessing
+ * the tlvs in the lec_arp_table->tlvs array
+ */
+struct tlv {
+	u32 type;
+	u8 length;
+	u8 value[255];
 };
 
 /* Status fields */
-#define ESI_UNKNOWN 0       /*
-                             * Next packet sent to this mac address
-                             * causes ARP-request to be sent 
-                             */
-#define ESI_ARP_PENDING 1   /*
-                             * There is no ATM address associated with this
-                             * 48-bit address.  The LE-ARP protocol is in
-                             * progress.
-                             */
-#define ESI_VC_PENDING 2    /*
-                             * There is a valid ATM address associated with 
-                             * this 48-bit address but there is no VC set 
-                             * up to that ATM address.  The signaling 
-                             * protocol is in process.
-                             */
-#define ESI_FLUSH_PENDING 4 /*
-                             * The LEC has been notified of the FLUSH_START
-                             * status and it is assumed that the flush 
-                             * protocol is in process.
-                             */
-#define ESI_FORWARD_DIRECT 5 /*
-                              * Either the Path Switching Delay (C22) has 
-                              * elapsed or the LEC has notified the Mapping 
-                              * that the flush protocol has completed.  In 
-                              * either case, it is safe to forward packets 
-                              * to this address via the data direct VC.
-                              */
+#define ESI_UNKNOWN 0		/*
+				 * Next packet sent to this mac address
+				 * causes ARP-request to be sent
+				 */
+#define ESI_ARP_PENDING 1	/*
+				 * There is no ATM address associated with this
+				 * 48-bit address.  The LE-ARP protocol is in
+				 * progress.
+				 */
+#define ESI_VC_PENDING 2	/*
+				 * There is a valid ATM address associated with
+				 * this 48-bit address but there is no VC set
+				 * up to that ATM address.  The signaling
+				 * protocol is in process.
+				 */
+#define ESI_FLUSH_PENDING 4	/*
+				 * The LEC has been notified of the FLUSH_START
+				 * status and it is assumed that the flush
+				 * protocol is in process.
+				 */
+#define ESI_FORWARD_DIRECT 5	/*
+				 * Either the Path Switching Delay (C22) has
+				 * elapsed or the LEC has notified the Mapping
+				 * that the flush protocol has completed.  In
+				 * either case, it is safe to forward packets
+				 * to this address via the data direct VC.
+				 */
 
 /* Flag values */
 #define LEC_REMOTE_FLAG      0x0001
 #define LEC_PERMANENT_FLAG   0x0002
 
-#endif
+#endif /* _LEC_ARP_H_ */
-- 
cgit v1.2.3


From 4aff5e2333c9a1609662f2091f55c3f6fffdad36 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 10 Aug 2006 08:44:47 +0200
Subject: [PATCH] Split struct request ->flags into two parts

Right now ->flags is a bit of a mess: some are request types, and
others are just modifiers. Clean this up by splitting it into
->cmd_type and ->cmd_flags. This allows introduction of generic
Linux block message types, useful for sending generic Linux commands
to block devices.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c              |   2 +-
 block/elevator.c                |  26 +++---
 block/ll_rw_blk.c               | 101 ++++++++--------------
 block/scsi_ioctl.c              |   6 +-
 drivers/block/floppy.c          |   4 +-
 drivers/block/nbd.c             |   8 +-
 drivers/block/paride/pd.c       |   2 +-
 drivers/block/pktcdvd.c         |   6 +-
 drivers/block/xd.c              |   2 +-
 drivers/cdrom/cdrom.c           |   2 +-
 drivers/cdrom/cdu31a.c          |   4 +-
 drivers/ide/ide-cd.c            |  69 +++++++--------
 drivers/ide/ide-disk.c          |   5 +-
 drivers/ide/ide-dma.c           |   2 +-
 drivers/ide/ide-floppy.c        |  14 ++--
 drivers/ide/ide-io.c            |  36 ++++----
 drivers/ide/ide-lib.c           |   5 +-
 drivers/ide/ide-tape.c          |   8 +-
 drivers/ide/ide-taskfile.c      |   8 +-
 drivers/ide/ide.c               |   4 +-
 drivers/ide/legacy/hd.c         |   2 +-
 drivers/md/dm-emc.c             |   3 +-
 drivers/message/i2o/i2o_block.c |   7 +-
 drivers/mmc/mmc_queue.c         |   6 +-
 drivers/mtd/mtd_blkdevs.c       |   2 +-
 drivers/s390/block/dasd_diag.c  |   2 +-
 drivers/s390/block/dasd_eckd.c  |   2 +-
 drivers/s390/block/dasd_fba.c   |   2 +-
 drivers/scsi/aic7xxx_old.c      |   4 +-
 drivers/scsi/ide-scsi.c         |  14 ++--
 drivers/scsi/pluto.c            |   6 +-
 drivers/scsi/scsi_lib.c         |  37 +++++----
 drivers/scsi/sd.c               |   5 +-
 drivers/scsi/sun3_NCR5380.c     |   2 +-
 drivers/scsi/sun3_scsi.c        |   2 +-
 drivers/scsi/sun3_scsi_vme.c    |   2 +-
 include/linux/blkdev.h          | 180 ++++++++++++++++++++++------------------
 include/linux/blktrace_api.h    |   2 +-
 include/scsi/scsi_tcq.h         |   2 +-
 39 files changed, 295 insertions(+), 301 deletions(-)

(limited to 'include/linux')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 5da56d48fbd3..ad1cc4077819 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1335,7 +1335,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 	arq->state = AS_RQ_NEW;
 
 	if (rq_data_dir(arq->request) == READ
-			|| (arq->request->flags & REQ_RW_SYNC))
+			|| (arq->request->cmd_flags & REQ_RW_SYNC))
 		arq->is_sync = 1;
 	else
 		arq->is_sync = 0;
diff --git a/block/elevator.c b/block/elevator.c
index 9b72dc7c8a5c..4ac97b642042 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -242,7 +242,7 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 
-		if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+		if (pos->cmd_flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
 			break;
 		if (rq->sector >= boundary) {
 			if (pos->sector < boundary)
@@ -313,7 +313,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 			e->ops->elevator_deactivate_req_fn(q, rq);
 	}
 
-	rq->flags &= ~REQ_STARTED;
+	rq->cmd_flags &= ~REQ_STARTED;
 
 	elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
 }
@@ -344,13 +344,13 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 
 	switch (where) {
 	case ELEVATOR_INSERT_FRONT:
-		rq->flags |= REQ_SOFTBARRIER;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 
 		list_add(&rq->queuelist, &q->queue_head);
 		break;
 
 	case ELEVATOR_INSERT_BACK:
-		rq->flags |= REQ_SOFTBARRIER;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 		elv_drain_elevator(q);
 		list_add_tail(&rq->queuelist, &q->queue_head);
 		/*
@@ -369,7 +369,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 
 	case ELEVATOR_INSERT_SORT:
 		BUG_ON(!blk_fs_request(rq));
-		rq->flags |= REQ_SORTED;
+		rq->cmd_flags |= REQ_SORTED;
 		q->nr_sorted++;
 		if (q->last_merge == NULL && rq_mergeable(rq))
 			q->last_merge = rq;
@@ -387,7 +387,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 		 * insertion; otherwise, requests should be requeued
 		 * in ordseq order.
 		 */
-		rq->flags |= REQ_SOFTBARRIER;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 
 		if (q->ordseq == 0) {
 			list_add(&rq->queuelist, &q->queue_head);
@@ -429,9 +429,9 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 		       int plug)
 {
 	if (q->ordcolor)
-		rq->flags |= REQ_ORDERED_COLOR;
+		rq->cmd_flags |= REQ_ORDERED_COLOR;
 
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
 		/*
 		 * toggle ordered color
 		 */
@@ -452,7 +452,7 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = rq;
 		}
-	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
+	} else if (!(rq->cmd_flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
 		where = ELEVATOR_INSERT_BACK;
 
 	if (plug)
@@ -493,7 +493,7 @@ struct request *elv_next_request(request_queue_t *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
-		if (!(rq->flags & REQ_STARTED)) {
+		if (!(rq->cmd_flags & REQ_STARTED)) {
 			elevator_t *e = q->elevator;
 
 			/*
@@ -510,7 +510,7 @@ struct request *elv_next_request(request_queue_t *q)
 			 * it, a request that has been delayed should
 			 * not be passed by new incoming requests
 			 */
-			rq->flags |= REQ_STARTED;
+			rq->cmd_flags |= REQ_STARTED;
 			blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
 		}
 
@@ -519,7 +519,7 @@ struct request *elv_next_request(request_queue_t *q)
 			q->boundary_rq = NULL;
 		}
 
-		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
+		if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
 			break;
 
 		ret = q->prep_rq_fn(q, rq);
@@ -541,7 +541,7 @@ struct request *elv_next_request(request_queue_t *q)
 				nr_bytes = rq->data_len;
 
 			blkdev_dequeue_request(rq);
-			rq->flags |= REQ_QUIET;
+			rq->cmd_flags |= REQ_QUIET;
 			end_that_request_chunk(rq, 0, nr_bytes);
 			end_that_request_last(rq, 0);
 		} else {
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 51dc0edf76e0..9b91bb70c5ed 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -382,8 +382,8 @@ unsigned blk_ordered_req_seq(struct request *rq)
 	if (rq == &q->post_flush_rq)
 		return QUEUE_ORDSEQ_POSTFLUSH;
 
-	if ((rq->flags & REQ_ORDERED_COLOR) ==
-	    (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
+	if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
+	    (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
 		return QUEUE_ORDSEQ_DRAIN;
 	else
 		return QUEUE_ORDSEQ_DONE;
@@ -446,8 +446,8 @@ static void queue_flush(request_queue_t *q, unsigned which)
 		end_io = post_flush_end_io;
 	}
 
+	rq->cmd_flags = REQ_HARDBARRIER;
 	rq_init(q, rq);
-	rq->flags = REQ_HARDBARRIER;
 	rq->elevator_private = NULL;
 	rq->rq_disk = q->bar_rq.rq_disk;
 	rq->rl = NULL;
@@ -471,9 +471,11 @@ static inline struct request *start_ordered(request_queue_t *q,
 	blkdev_dequeue_request(rq);
 	q->orig_bar_rq = rq;
 	rq = &q->bar_rq;
+	rq->cmd_flags = 0;
 	rq_init(q, rq);
-	rq->flags = bio_data_dir(q->orig_bar_rq->bio);
-	rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
+	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
+		rq->cmd_flags |= REQ_RW;
+	rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
 	rq->elevator_private = NULL;
 	rq->rl = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
@@ -1124,7 +1126,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq)
 	}
 
 	list_del_init(&rq->queuelist);
-	rq->flags &= ~REQ_QUEUED;
+	rq->cmd_flags &= ~REQ_QUEUED;
 	rq->tag = -1;
 
 	if (unlikely(bqt->tag_index[tag] == NULL))
@@ -1160,7 +1162,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
 	struct blk_queue_tag *bqt = q->queue_tags;
 	int tag;
 
-	if (unlikely((rq->flags & REQ_QUEUED))) {
+	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
 		printk(KERN_ERR 
 		       "%s: request %p for device [%s] already tagged %d",
 		       __FUNCTION__, rq,
@@ -1174,7 +1176,7 @@ int blk_queue_start_tag(request_queue_t *q, struct request *rq)
 
 	__set_bit(tag, bqt->tag_map);
 
-	rq->flags |= REQ_QUEUED;
+	rq->cmd_flags |= REQ_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
 	blkdev_dequeue_request(rq);
@@ -1210,65 +1212,31 @@ void blk_queue_invalidate_tags(request_queue_t *q)
 			printk(KERN_ERR
 			       "%s: bad tag found on list\n", __FUNCTION__);
 			list_del_init(&rq->queuelist);
-			rq->flags &= ~REQ_QUEUED;
+			rq->cmd_flags &= ~REQ_QUEUED;
 		} else
 			blk_queue_end_tag(q, rq);
 
-		rq->flags &= ~REQ_STARTED;
+		rq->cmd_flags &= ~REQ_STARTED;
 		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
 	}
 }
 
 EXPORT_SYMBOL(blk_queue_invalidate_tags);
 
-static const char * const rq_flags[] = {
-	"REQ_RW",
-	"REQ_FAILFAST",
-	"REQ_SORTED",
-	"REQ_SOFTBARRIER",
-	"REQ_HARDBARRIER",
-	"REQ_FUA",
-	"REQ_CMD",
-	"REQ_NOMERGE",
-	"REQ_STARTED",
-	"REQ_DONTPREP",
-	"REQ_QUEUED",
-	"REQ_ELVPRIV",
-	"REQ_PC",
-	"REQ_BLOCK_PC",
-	"REQ_SENSE",
-	"REQ_FAILED",
-	"REQ_QUIET",
-	"REQ_SPECIAL",
-	"REQ_DRIVE_CMD",
-	"REQ_DRIVE_TASK",
-	"REQ_DRIVE_TASKFILE",
-	"REQ_PREEMPT",
-	"REQ_PM_SUSPEND",
-	"REQ_PM_RESUME",
-	"REQ_PM_SHUTDOWN",
-	"REQ_ORDERED_COLOR",
-};
-
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	int bit;
 
-	printk("%s: dev %s: flags = ", msg,
-		rq->rq_disk ? rq->rq_disk->disk_name : "?");
-	bit = 0;
-	do {
-		if (rq->flags & (1 << bit))
-			printk("%s ", rq_flags[bit]);
-		bit++;
-	} while (bit < __REQ_NR_BITS);
+	printk("%s: dev %s: type=%x, flags=%x\n", msg,
+		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
+		rq->cmd_flags);
 
 	printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
 						       rq->nr_sectors,
 						       rq->current_nr_sectors);
 	printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
 
-	if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
+	if (blk_pc_request(rq)) {
 		printk("cdb: ");
 		for (bit = 0; bit < sizeof(rq->cmd); bit++)
 			printk("%02x ", rq->cmd[bit]);
@@ -1441,7 +1409,7 @@ static inline int ll_new_mergeable(request_queue_t *q,
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
 	if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1464,7 +1432,7 @@ static inline int ll_new_hw_segment(request_queue_t *q,
 
 	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
 	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1491,7 +1459,7 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
 		max_sectors = q->max_sectors;
 
 	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1530,7 +1498,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req,
 
 
 	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -2029,7 +1997,7 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 {
-	if (rq->flags & REQ_ELVPRIV)
+	if (rq->cmd_flags & REQ_ELVPRIV)
 		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
@@ -2044,17 +2012,17 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
 		return NULL;
 
 	/*
-	 * first three bits are identical in rq->flags and bio->bi_rw,
+	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
 	 * see bio.h and blkdev.h
 	 */
-	rq->flags = rw;
+	rq->cmd_flags = rw;
 
 	if (priv) {
 		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
 			mempool_free(rq, q->rq.rq_pool);
 			return NULL;
 		}
-		rq->flags |= REQ_ELVPRIV;
+		rq->cmd_flags |= REQ_ELVPRIV;
 	}
 
 	return rq;
@@ -2351,7 +2319,8 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
 	 * must not attempt merges on this) and that it acts as a soft
 	 * barrier
 	 */
-	rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
+	rq->cmd_flags |= REQ_SOFTBARRIER;
 
 	rq->special = data;
 
@@ -2558,7 +2527,7 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 
 	rq->rq_disk = bd_disk;
-	rq->flags |= REQ_NOMERGE;
+	rq->cmd_flags |= REQ_NOMERGE;
 	rq->end_io = done;
 	WARN_ON(irqs_disabled());
 	spin_lock_irq(q->queue_lock);
@@ -2728,7 +2697,7 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 	 */
 	if (rl) {
 		int rw = rq_data_dir(req);
-		int priv = req->flags & REQ_ELVPRIV;
+		int priv = req->cmd_flags & REQ_ELVPRIV;
 
 		BUG_ON(!list_empty(&req->queuelist));
 
@@ -2890,22 +2859,22 @@ static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
 
 static void init_request_from_bio(struct request *req, struct bio *bio)
 {
-	req->flags |= REQ_CMD;
+	req->cmd_type = REQ_TYPE_FS;
 
 	/*
 	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
 	 */
 	if (bio_rw_ahead(bio) || bio_failfast(bio))
-		req->flags |= REQ_FAILFAST;
+		req->cmd_flags |= REQ_FAILFAST;
 
 	/*
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 */
 	if (unlikely(bio_barrier(bio)))
-		req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
+		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
 	if (bio_sync(bio))
-		req->flags |= REQ_RW_SYNC;
+		req->cmd_flags |= REQ_RW_SYNC;
 
 	req->errors = 0;
 	req->hard_sector = req->sector = bio->bi_sector;
@@ -3306,7 +3275,7 @@ static int __end_that_request_first(struct request *req, int uptodate,
 		req->errors = 0;
 
 	if (!uptodate) {
-		if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
+		if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
 			printk("end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
 				(unsigned long long)req->sector);
@@ -3569,8 +3538,8 @@ EXPORT_SYMBOL(end_request);
 
 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
 {
-	/* first two bits are identical in rq->flags and bio->bi_rw */
-	rq->flags |= (bio->bi_rw & 3);
+	/* first two bits are identical in rq->cmd_flags and bio->bi_rw */
+	rq->cmd_flags |= (bio->bi_rw & 3);
 
 	rq->nr_phys_segments = bio_phys_segments(q, bio);
 	rq->nr_hw_segments = bio_hw_segments(q, bio);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index b33eda26e205..2dc326421a24 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -294,7 +294,7 @@ static int sg_io(struct file *file, request_queue_t *q,
 	rq->sense = sense;
 	rq->sense_len = 0;
 
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	bio = rq->bio;
 
 	/*
@@ -470,7 +470,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
 	memset(sense, 0, sizeof(sense));
 	rq->sense = sense;
 	rq->sense_len = 0;
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	blk_execute_rq(q, disk, rq, 0);
 
@@ -502,7 +502,7 @@ static int __blk_send_generic(request_queue_t *q, struct gendisk *bd_disk, int c
 	int err;
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->data = NULL;
 	rq->data_len = 0;
 	rq->timeout = BLK_DEFAULT_TIMEOUT;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index ad1d7065a1b2..629c5769d994 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2991,8 +2991,8 @@ static void do_fd_request(request_queue_t * q)
 	if (usage_count == 0) {
 		printk("warning: usage count=0, current_req=%p exiting\n",
 		       current_req);
-		printk("sect=%ld flags=%lx\n", (long)current_req->sector,
-		       current_req->flags);
+		printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
+		       current_req->cmd_type, current_req->cmd_flags);
 		return;
 	}
 	if (test_bit(0, &fdc_busy)) {
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index bdbade9a5cf5..9d1035e8d9d8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -407,10 +407,10 @@ static void do_nbd_request(request_queue_t * q)
 		struct nbd_device *lo;
 
 		blkdev_dequeue_request(req);
-		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n",
-				req->rq_disk->disk_name, req, req->flags);
+		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
+				req->rq_disk->disk_name, req, req->cmd_type);
 
-		if (!(req->flags & REQ_CMD))
+		if (!blk_fs_request(req))
 			goto error_out;
 
 		lo = req->rq_disk->private_data;
@@ -489,7 +489,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 	switch (cmd) {
 	case NBD_DISCONNECT:
 	        printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
-		sreq.flags = REQ_SPECIAL;
+		sreq.cmd_type = REQ_TYPE_SPECIAL;
 		nbd_cmd(&sreq) = NBD_CMD_DISC;
 		/*
 		 * Set these to sane values in case server implementation
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 2403721f9db1..12ff1a274d91 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -437,7 +437,7 @@ static char *pd_buf;		/* buffer for request in progress */
 
 static enum action do_pd_io_start(void)
 {
-	if (pd_req->flags & REQ_SPECIAL) {
+	if (blk_special_request(pd_req)) {
 		phase = pd_special;
 		return pd_special();
 	}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 451b996bba91..42891d2b054e 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -365,16 +365,16 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	rq->sense = sense;
 	memset(sense, 0, sizeof(sense));
 	rq->sense_len = 0;
-	rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_flags |= REQ_HARDBARRIER;
 	if (cgc->quiet)
-		rq->flags |= REQ_QUIET;
+		rq->cmd_flags |= REQ_QUIET;
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 	if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
 		memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
 	rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
 
 	rq->ref_count++;
-	rq->flags |= REQ_NOMERGE;
 	rq->waiting = &wait;
 	rq->end_io = blk_end_sync_rq;
 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index e828e4cbd3e1..ebf3025721d1 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -313,7 +313,7 @@ static void do_xd_request (request_queue_t * q)
 		int res = 0;
 		int retry;
 
-		if (!(req->flags & REQ_CMD)) {
+		if (!blk_fs_request(req)) {
 			end_request(req, 0);
 			continue;
 		}
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index d239cf8b20bd..b38c84a7a8e3 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2129,7 +2129,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 		rq->cmd[9] = 0xf8;
 
 		rq->cmd_len = 12;
-		rq->flags |= REQ_BLOCK_PC;
+		rq->cmd_type = REQ_TYPE_BLOCK_PC;
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 
diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c
index 37bdb0163f0d..ccd91c1a84bd 100644
--- a/drivers/cdrom/cdu31a.c
+++ b/drivers/cdrom/cdu31a.c
@@ -1338,8 +1338,10 @@ static void do_cdu31a_request(request_queue_t * q)
 		}
 
 		/* WTF??? */
-		if (!(req->flags & REQ_CMD))
+		if (!blk_fs_request(req)) {
+			end_request(req, 0);
 			continue;
+		}
 		if (rq_data_dir(req) == WRITE) {
 			end_request(req, 0);
 			continue;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 654d4cd09847..69bbb6206a00 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -372,7 +372,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq,
 {
 	int log = 0;
 
-	if (!sense || !rq || (rq->flags & REQ_QUIET))
+	if (!sense || !rq || (rq->cmd_flags & REQ_QUIET))
 		return 0;
 
 	switch (sense->sense_key) {
@@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq)
 	struct cdrom_info *cd = drive->driver_data;
 
 	ide_init_drive_cmd(rq);
-	rq->flags = REQ_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->rq_disk = cd->disk;
 }
 
@@ -617,7 +617,7 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
 	rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	rq->cmd[4] = rq->data_len = 18;
 
-	rq->flags = REQ_SENSE;
+	rq->cmd_type = REQ_TYPE_SENSE;
 
 	/* NOTE! Save the failed command in "rq->buffer" */
 	rq->buffer = (void *) failed_command;
@@ -630,10 +630,10 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate)
 	struct request *rq = HWGROUP(drive)->rq;
 	int nsectors = rq->hard_cur_sectors;
 
-	if ((rq->flags & REQ_SENSE) && uptodate) {
+	if (blk_sense_request(rq) && uptodate) {
 		/*
-		 * For REQ_SENSE, "rq->buffer" points to the original failed
-		 * request
+		 * For REQ_TYPE_SENSE, "rq->buffer" points to the original
+		 * failed request
 		 */
 		struct request *failed = (struct request *) rq->buffer;
 		struct cdrom_info *info = drive->driver_data;
@@ -706,17 +706,17 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 		return 1;
 	}
 
-	if (rq->flags & REQ_SENSE) {
+	if (blk_sense_request(rq)) {
 		/* We got an error trying to get sense info
 		   from the drive (probably while trying
 		   to recover from a former error).  Just give up. */
 
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 		cdrom_end_request(drive, 0);
 		ide_error(drive, "request sense failure", stat);
 		return 1;
 
-	} else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) {
+	} else if (blk_pc_request(rq)) {
 		/* All other functions, except for READ. */
 		unsigned long flags;
 
@@ -724,7 +724,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 		 * if we have an error, pass back CHECK_CONDITION as the
 		 * scsi status byte
 		 */
-		if ((rq->flags & REQ_BLOCK_PC) && !rq->errors)
+		if (!rq->errors)
 			rq->errors = SAM_STAT_CHECK_CONDITION;
 
 		/* Check for tray open. */
@@ -735,12 +735,12 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret)
 			cdrom_saw_media_change (drive);
 			/*printk("%s: media changed\n",drive->name);*/
 			return 0;
-		} else if (!(rq->flags & REQ_QUIET)) {
+		} else if (!(rq->cmd_flags & REQ_QUIET)) {
 			/* Otherwise, print an error. */
 			ide_dump_status(drive, "packet command error", stat);
 		}
 		
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 
 		/*
 		 * instead of playing games with moving completions around,
@@ -881,7 +881,7 @@ static int cdrom_timer_expiry(ide_drive_t *drive)
 			wait = ATAPI_WAIT_PC;
 			break;
 		default:
-			if (!(rq->flags & REQ_QUIET))
+			if (!(rq->cmd_flags & REQ_QUIET))
 				printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]);
 			wait = 0;
 			break;
@@ -1124,7 +1124,7 @@ static ide_startstop_t cdrom_read_intr (ide_drive_t *drive)
 		if (rq->current_nr_sectors > 0) {
 			printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n",
 				drive->name, rq->current_nr_sectors);
-			rq->flags |= REQ_FAILED;
+			rq->cmd_flags |= REQ_FAILED;
 			cdrom_end_request(drive, 0);
 		} else
 			cdrom_end_request(drive, 1);
@@ -1456,7 +1456,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
 			printk ("%s: cdrom_pc_intr: data underrun %d\n",
 				drive->name, pc->buflen);
 			*/
-			rq->flags |= REQ_FAILED;
+			rq->cmd_flags |= REQ_FAILED;
 			cdrom_end_request(drive, 0);
 		}
 		return ide_stopped;
@@ -1509,7 +1509,7 @@ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive)
 		rq->data += thislen;
 		rq->data_len -= thislen;
 
-		if (rq->flags & REQ_SENSE)
+		if (blk_sense_request(rq))
 			rq->sense_len += thislen;
 	} else {
 confused:
@@ -1517,7 +1517,7 @@ confused:
 			"appears confused (ireason = 0x%02x). "
 			"Trying to recover by ending request.\n",
 			drive->name, ireason);
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 		cdrom_end_request(drive, 0);
 		return ide_stopped;
 	}
@@ -1546,7 +1546,7 @@ static ide_startstop_t cdrom_do_packet_command (ide_drive_t *drive)
 	struct cdrom_info *info = drive->driver_data;
 
 	info->dma = 0;
-	rq->flags &= ~REQ_FAILED;
+	rq->cmd_flags &= ~REQ_FAILED;
 	len = rq->data_len;
 
 	/* Start sending the command to the drive. */
@@ -1558,7 +1558,7 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 {
 	struct request_sense sense;
 	int retries = 10;
-	unsigned int flags = rq->flags;
+	unsigned int flags = rq->cmd_flags;
 
 	if (rq->sense == NULL)
 		rq->sense = &sense;
@@ -1567,14 +1567,14 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 	do {
 		int error;
 		unsigned long time = jiffies;
-		rq->flags = flags;
+		rq->cmd_flags = flags;
 
 		error = ide_do_drive_cmd(drive, rq, ide_wait);
 		time = jiffies - time;
 
 		/* FIXME: we should probably abort/retry or something 
 		 * in case of failure */
-		if (rq->flags & REQ_FAILED) {
+		if (rq->cmd_flags & REQ_FAILED) {
 			/* The request failed.  Retry if it was due to a unit
 			   attention status
 			   (usually means media was changed). */
@@ -1596,10 +1596,10 @@ static int cdrom_queue_packet_command(ide_drive_t *drive, struct request *rq)
 		}
 
 		/* End of retry loop. */
-	} while ((rq->flags & REQ_FAILED) && retries >= 0);
+	} while ((rq->cmd_flags & REQ_FAILED) && retries >= 0);
 
 	/* Return an error if the command failed. */
-	return (rq->flags & REQ_FAILED) ? -EIO : 0;
+	return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0;
 }
 
 /*
@@ -1963,7 +1963,7 @@ static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
 {
 	struct cdrom_info *info = drive->driver_data;
 
-	rq->flags |= REQ_QUIET;
+	rq->cmd_flags |= REQ_QUIET;
 
 	info->dma = 0;
 
@@ -2023,11 +2023,11 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block)
 		}
 		info->last_block = block;
 		return action;
-	} else if (rq->flags & (REQ_PC | REQ_SENSE)) {
+	} else if (rq->cmd_type == REQ_TYPE_SENSE) {
 		return cdrom_do_packet_command(drive);
-	} else if (rq->flags & REQ_BLOCK_PC) {
+	} else if (blk_pc_request(rq)) {
 		return cdrom_do_block_pc(drive, rq);
-	} else if (rq->flags & REQ_SPECIAL) {
+	} else if (blk_special_request(rq)) {
 		/*
 		 * right now this can only be a reset...
 		 */
@@ -2105,7 +2105,7 @@ static int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
 
 	req.sense = sense;
 	req.cmd[0] = GPCMD_TEST_UNIT_READY;
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 
 #if ! STANDARD_ATAPI
         /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to 
@@ -2207,7 +2207,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
 	req.cmd[0] = GPCMD_READ_CDVD_CAPACITY;
 	req.data = (char *)&capbuf;
 	req.data_len = sizeof(capbuf);
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 
 	stat = cdrom_queue_packet_command(drive, &req);
 	if (stat == 0) {
@@ -2230,7 +2230,7 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag,
 	req.sense = sense;
 	req.data =  buf;
 	req.data_len = buflen;
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 	req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
 	req.cmd[6] = trackno;
 	req.cmd[7] = (buflen >> 8);
@@ -2531,7 +2531,7 @@ static int ide_cdrom_packet(struct cdrom_device_info *cdi,
 	req.timeout = cgc->timeout;
 
 	if (cgc->quiet)
-		req.flags |= REQ_QUIET;
+		req.cmd_flags |= REQ_QUIET;
 
 	req.sense = cgc->sense;
 	cgc->stat = cdrom_queue_packet_command(drive, &req);
@@ -2629,7 +2629,8 @@ int ide_cdrom_reset (struct cdrom_device_info *cdi)
 	int ret;
 
 	cdrom_prepare_request(drive, &req);
-	req.flags = REQ_SPECIAL | REQ_QUIET;
+	req.cmd_type = REQ_TYPE_SPECIAL;
+	req.cmd_flags = REQ_QUIET;
 	ret = ide_do_drive_cmd(drive, &req, ide_wait);
 
 	/*
@@ -3116,9 +3117,9 @@ static int ide_cdrom_prep_pc(struct request *rq)
 
 static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq)
 {
-	if (rq->flags & REQ_CMD)
+	if (blk_fs_request(rq))
 		return ide_cdrom_prep_fs(q, rq);
-	else if (rq->flags & REQ_BLOCK_PC)
+	else if (blk_pc_request(rq))
 		return ide_cdrom_prep_pc(rq);
 
 	return 0;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7cf3eb023521..0a05a377d66a 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -699,7 +699,8 @@ static void idedisk_prepare_flush(request_queue_t *q, struct request *rq)
 		rq->cmd[0] = WIN_FLUSH_CACHE;
 
 
-	rq->flags |= REQ_DRIVE_TASK;
+	rq->cmd_type = REQ_TYPE_ATA_TASK;
+	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->buffer = rq->cmd;
 }
 
@@ -740,7 +741,7 @@ static int set_multcount(ide_drive_t *drive, int arg)
 	if (drive->special.b.set_multmode)
 		return -EBUSY;
 	ide_init_drive_cmd (&rq);
-	rq.flags = REQ_DRIVE_CMD;
+	rq.cmd_type = REQ_TYPE_ATA_CMD;
 	drive->mult_req = arg;
 	drive->special.b.set_multmode = 1;
 	(void) ide_do_drive_cmd (drive, &rq, ide_wait);
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 7c3a13e1cf64..c3546fe9af63 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -205,7 +205,7 @@ int ide_build_sglist(ide_drive_t *drive, struct request *rq)
 	ide_hwif_t *hwif = HWIF(drive);
 	struct scatterlist *sg = hwif->sg_table;
 
-	BUG_ON((rq->flags & REQ_DRIVE_TASKFILE) && rq->nr_sectors > 256);
+	BUG_ON((rq->cmd_type == REQ_TYPE_ATA_TASKFILE) && rq->nr_sectors > 256);
 
 	ide_map_sg(drive, rq);
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index adbe9f76a505..0edc32204915 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -588,7 +588,7 @@ static int idefloppy_do_end_request(ide_drive_t *drive, int uptodate, int nsecs)
 	/* Why does this happen? */
 	if (!rq)
 		return 0;
-	if (!(rq->flags & REQ_SPECIAL)) { //if (!IDEFLOPPY_RQ_CMD (rq->cmd)) {
+	if (!blk_special_request(rq)) {
 		/* our real local end request function */
 		ide_end_request(drive, uptodate, nsecs);
 		return 0;
@@ -689,7 +689,7 @@ static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struc
 
 	ide_init_drive_cmd(rq);
 	rq->buffer = (char *) pc;
-	rq->flags = REQ_SPECIAL;	//rq->cmd = IDEFLOPPY_PC_RQ;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->rq_disk = floppy->disk;
 	(void) ide_do_drive_cmd(drive, rq, ide_preempt);
 }
@@ -1250,7 +1250,7 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t
 	pc->callback = &idefloppy_rw_callback;
 	pc->rq = rq;
 	pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
-	if (rq->flags & REQ_RW)
+	if (rq->cmd_flags & REQ_RW)
 		set_bit(PC_WRITING, &pc->flags);
 	pc->buffer = NULL;
 	pc->request_transfer = pc->buffer_size = blocks * floppy->block_size;
@@ -1303,7 +1303,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
 		idefloppy_do_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
-	if (rq->flags & REQ_CMD) {
+	if (blk_fs_request(rq)) {
 		if (((long)rq->sector % floppy->bs_factor) ||
 		    (rq->nr_sectors % floppy->bs_factor)) {
 			printk("%s: unsupported r/w request size\n",
@@ -1313,9 +1313,9 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
 		}
 		pc = idefloppy_next_pc_storage(drive);
 		idefloppy_create_rw_cmd(floppy, pc, rq, block);
-	} else if (rq->flags & REQ_SPECIAL) {
+	} else if (blk_special_request(rq)) {
 		pc = (idefloppy_pc_t *) rq->buffer;
-	} else if (rq->flags & REQ_BLOCK_PC) {
+	} else if (blk_pc_request(rq)) {
 		pc = idefloppy_next_pc_storage(drive);
 		if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
 			idefloppy_do_end_request(drive, 0, 0);
@@ -1343,7 +1343,7 @@ static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc)
 
 	ide_init_drive_cmd (&rq);
 	rq.buffer = (char *) pc;
-	rq.flags = REQ_SPECIAL;		//	rq.cmd = IDEFLOPPY_PC_RQ;
+	rq.cmd_type = REQ_TYPE_SPECIAL;
 	rq.rq_disk = floppy->disk;
 
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index fb6795236e76..3436b1f104eb 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -59,7 +59,7 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
 {
 	int ret = 1;
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 	/*
 	 * if failfast is set on a request, override number of sectors and
@@ -244,7 +244,7 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
 
 	spin_lock_irqsave(&ide_lock, flags);
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 	/*
 	 * if failfast is set on a request, override number of sectors and
@@ -366,7 +366,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 	rq = HWGROUP(drive)->rq;
 	spin_unlock_irqrestore(&ide_lock, flags);
 
-	if (rq->flags & REQ_DRIVE_CMD) {
+	if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
 		u8 *args = (u8 *) rq->buffer;
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -376,7 +376,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 			args[1] = err;
 			args[2] = hwif->INB(IDE_NSECTOR_REG);
 		}
-	} else if (rq->flags & REQ_DRIVE_TASK) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		u8 *args = (u8 *) rq->buffer;
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -390,7 +390,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 			args[5] = hwif->INB(IDE_HCYL_REG);
 			args[6] = hwif->INB(IDE_SELECT_REG);
 		}
-	} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *args = (ide_task_t *) rq->special;
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -587,7 +587,7 @@ ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, u8 stat)
 		return ide_stopped;
 
 	/* retry only "normal" I/O: */
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+	if (!blk_fs_request(rq)) {
 		rq->errors = 1;
 		ide_end_drive_cmd(drive, stat, err);
 		return ide_stopped;
@@ -638,7 +638,7 @@ ide_startstop_t ide_abort(ide_drive_t *drive, const char *msg)
 		return ide_stopped;
 
 	/* retry only "normal" I/O: */
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+	if (!blk_fs_request(rq)) {
 		rq->errors = 1;
 		ide_end_drive_cmd(drive, BUSY_STAT, 0);
 		return ide_stopped;
@@ -808,7 +808,7 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq)
 	if (hwif->sg_mapped)	/* needed by ide-scsi */
 		return;
 
-	if ((rq->flags & REQ_DRIVE_TASKFILE) == 0) {
+	if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
 		hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 	} else {
 		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
@@ -844,7 +844,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 		struct request *rq)
 {
 	ide_hwif_t *hwif = HWIF(drive);
-	if (rq->flags & REQ_DRIVE_TASKFILE) {
+	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
  		ide_task_t *args = rq->special;
  
 		if (!args)
@@ -866,7 +866,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
 		if (args->tf_out_flags.all != 0) 
 			return flagged_taskfile(drive, args);
 		return do_rw_taskfile(drive, args);
-	} else if (rq->flags & REQ_DRIVE_TASK) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		u8 *args = rq->buffer;
 		u8 sel;
  
@@ -892,7 +892,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
  		hwif->OUTB(sel, IDE_SELECT_REG);
  		ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
  		return ide_started;
- 	} else if (rq->flags & REQ_DRIVE_CMD) {
+ 	} else if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
  		u8 *args = rq->buffer;
 
 		if (!args)
@@ -980,7 +980,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 	ide_startstop_t startstop;
 	sector_t block;
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 #ifdef DEBUG
 	printk("%s: start_request: current=0x%08lx\n",
@@ -1013,9 +1013,9 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 	if (!drive->special.all) {
 		ide_driver_t *drv;
 
-		if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK))
-			return execute_drive_cmd(drive, rq);
-		else if (rq->flags & REQ_DRIVE_TASKFILE)
+		if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+		    rq->cmd_type == REQ_TYPE_ATA_TASK ||
+		    rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
 			struct request_pm_state *pm = rq->end_io_data;
@@ -1264,7 +1264,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		 * We count how many times we loop here to make sure we service
 		 * all drives in the hwgroup without looping for ever
 		 */
-		if (drive->blocked && !blk_pm_request(rq) && !(rq->flags & REQ_PREEMPT)) {
+		if (drive->blocked && !blk_pm_request(rq) && !(rq->cmd_flags & REQ_PREEMPT)) {
 			drive = drive->next ? drive->next : hwgroup->drive;
 			if (loops++ < 4 && !blk_queue_plugged(drive->queue))
 				goto again;
@@ -1670,7 +1670,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs)
 void ide_init_drive_cmd (struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
-	rq->flags = REQ_DRIVE_CMD;
+	rq->cmd_type = REQ_TYPE_ATA_CMD;
 	rq->ref_count = 1;
 }
 
@@ -1727,7 +1727,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 		hwgroup->rq = NULL;
 	if (action == ide_preempt || action == ide_head_wait) {
 		where = ELEVATOR_INSERT_FRONT;
-		rq->flags |= REQ_PREEMPT;
+		rq->cmd_flags |= REQ_PREEMPT;
 	}
 	__elv_add_request(drive->queue, rq, where, 0);
 	ide_do_request(hwgroup, IDE_NO_IRQ);
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index 1feff23487d4..850ef63cc986 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -456,13 +456,14 @@ static void ide_dump_opcode(ide_drive_t *drive)
 	spin_unlock(&ide_lock);
 	if (!rq)
 		return;
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) {
+	if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+	    rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		char *args = rq->buffer;
 		if (args) {
 			opcode = args[0];
 			found = 1;
 		}
-	} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *args = rq->special;
 		if (args) {
 			task_struct_t *tf = (task_struct_t *) args->tfRegister;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 7067ab997927..643e4b9ac651 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -1776,7 +1776,7 @@ static void idetape_create_request_sense_cmd (idetape_pc_t *pc)
 static void idetape_init_rq(struct request *rq, u8 cmd)
 {
 	memset(rq, 0, sizeof(*rq));
-	rq->flags = REQ_SPECIAL;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[0] = cmd;
 }
 
@@ -2433,12 +2433,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 			rq->sector, rq->nr_sectors, rq->current_nr_sectors);
 #endif /* IDETAPE_DEBUG_LOG */
 
-	if ((rq->flags & REQ_SPECIAL) == 0) {
+	if (!blk_special_request(rq)) {
 		/*
 		 * We do not support buffer cache originated requests.
 		 */
 		printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
-			"request queue (%ld)\n", drive->name, rq->flags);
+			"request queue (%d)\n", drive->name, rq->cmd_type);
 		ide_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
@@ -2768,7 +2768,7 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq)
 	idetape_tape_t *tape = drive->driver_data;
 
 #if IDETAPE_DEBUG_BUGS
-	if (rq == NULL || (rq->flags & REQ_SPECIAL) == 0) {
+	if (rq == NULL || !blk_special_request(rq)) {
 		printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n");
 		return;
 	}
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 97a9244312fc..1d0470c1f957 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -363,7 +363,7 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq,
 
 static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
 {
-	if (rq->flags & REQ_DRIVE_TASKFILE) {
+	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *task = rq->special;
 
 		if (task->tf_out_flags.all) {
@@ -474,7 +474,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
 	struct request rq;
 
 	memset(&rq, 0, sizeof(rq));
-	rq.flags = REQ_DRIVE_TASKFILE;
+	rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
 	rq.buffer = buf;
 
 	/*
@@ -499,7 +499,7 @@ static int ide_diag_taskfile(ide_drive_t *drive, ide_task_t *args, unsigned long
 		rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors;
 
 		if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
-			rq.flags |= REQ_RW;
+			rq.cmd_flags |= REQ_RW;
 	}
 
 	rq.special = args;
@@ -737,7 +737,7 @@ static int ide_wait_cmd_task(ide_drive_t *drive, u8 *buf)
 	struct request rq;
 
 	ide_init_drive_cmd(&rq);
-	rq.flags = REQ_DRIVE_TASK;
+	rq.cmd_type = REQ_TYPE_ATA_TASK;
 	rq.buffer = buf;
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 9c8468de1a75..9384a3fdde6c 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -1217,7 +1217,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	memset(&rq, 0, sizeof(rq));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
-	rq.flags = REQ_PM_SUSPEND;
+	rq.cmd_type = REQ_TYPE_PM_SUSPEND;
 	rq.special = &args;
 	rq.end_io_data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_suspend;
@@ -1238,7 +1238,7 @@ static int generic_ide_resume(struct device *dev)
 	memset(&rq, 0, sizeof(rq));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
-	rq.flags = REQ_PM_RESUME;
+	rq.cmd_type = REQ_TYPE_PM_RESUME;
 	rq.special = &args;
 	rq.end_io_data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_resume;
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c
index aebecd8f51cc..4ab931145673 100644
--- a/drivers/ide/legacy/hd.c
+++ b/drivers/ide/legacy/hd.c
@@ -626,7 +626,7 @@ repeat:
 		req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ",
 		cyl, head, sec, nsect, req->buffer);
 #endif
-	if (req->flags & REQ_CMD) {
+	if (blk_fs_request(req)) {
 		switch (rq_data_dir(req)) {
 		case READ:
 			hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr);
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index 2a374ccb30dd..2b2d45d7baaa 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -126,7 +126,8 @@ static struct request *get_failover_req(struct emc_handler *h,
 	memset(&rq->cmd, 0, BLK_MAX_CDB);
 
 	rq->timeout = EMC_FAILOVER_TIMEOUT;
-	rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE);
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
 
 	return rq;
 }
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 1ddc2fb429d5..eaba81bf2eca 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -390,9 +390,9 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
 	}
 
 	/* request is already processed by us, so return */
-	if (req->flags & REQ_SPECIAL) {
+	if (blk_special_request(req)) {
 		osm_debug("REQ_SPECIAL already set!\n");
-		req->flags |= REQ_DONTPREP;
+		req->cmd_flags |= REQ_DONTPREP;
 		return BLKPREP_OK;
 	}
 
@@ -411,7 +411,8 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req)
 		ireq = req->special;
 
 	/* do not come back here */
-	req->flags |= REQ_DONTPREP | REQ_SPECIAL;
+	req->cmd_type = REQ_TYPE_SPECIAL;
+	req->cmd_flags |= REQ_DONTPREP;
 
 	return BLKPREP_OK;
 };
diff --git a/drivers/mmc/mmc_queue.c b/drivers/mmc/mmc_queue.c
index 74f8cdeeff0f..4ccdd82b680f 100644
--- a/drivers/mmc/mmc_queue.c
+++ b/drivers/mmc/mmc_queue.c
@@ -28,7 +28,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	struct mmc_queue *mq = q->queuedata;
 	int ret = BLKPREP_KILL;
 
-	if (req->flags & REQ_SPECIAL) {
+	if (blk_special_request(req)) {
 		/*
 		 * Special commands already have the command
 		 * blocks already setup in req->special.
@@ -36,7 +36,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 		BUG_ON(!req->special);
 
 		ret = BLKPREP_OK;
-	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+	} else if (blk_fs_request(req) || blk_pc_request(req)) {
 		/*
 		 * Block I/O requests need translating according
 		 * to the protocol.
@@ -50,7 +50,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
 	}
 
 	if (ret == BLKPREP_OK)
-		req->flags |= REQ_DONTPREP;
+		req->cmd_flags |= REQ_DONTPREP;
 
 	return ret;
 }
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 458d3c8ae1ee..6baf5fe14230 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -46,7 +46,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	nsect = req->current_nr_sectors;
 	buf = req->buffer;
 
-	if (!(req->flags & REQ_CMD))
+	if (!blk_fs_request(req))
 		return 0;
 
 	if (block + nsect > get_capacity(req->rq_disk))
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 9d051e5687ea..222a8a71a5e8 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -529,7 +529,7 @@ dasd_diag_build_cp(struct dasd_device * device, struct request *req)
 	}
 	cqr->retries = DIAG_MAX_RETRIES;
 	cqr->buildclk = get_clock();
-	if (req->flags & REQ_FAILFAST)
+	if (req->cmd_flags & REQ_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->expires = DIAG_TIMEOUT;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index b7a7fac3f7c3..5ecea3e4fdef 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1266,7 +1266,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req)
 			recid++;
 		}
 	}
-	if (req->flags & REQ_FAILFAST)
+	if (req->cmd_flags & REQ_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index e85015be109b..80926c548228 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -344,7 +344,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req)
 			recid++;
 		}
 	}
-	if (req->flags & REQ_FAILFAST)
+	if (req->cmd_flags & REQ_FAILFAST)
 		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index 5dcef48d414f..10353379a074 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -2862,7 +2862,7 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb)
       aic_dev->r_total++;
       ptr = aic_dev->r_bins;
     }
-    if(cmd->device->simple_tags && cmd->request->flags & REQ_HARDBARRIER)
+    if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER)
     {
       aic_dev->barrier_total++;
       if(scb->tag_action == MSG_ORDERED_Q_TAG)
@@ -10158,7 +10158,7 @@ aic7xxx_buildscb(struct aic7xxx_host *p, Scsi_Cmnd *cmd,
     /* We always force TEST_UNIT_READY to untagged */
     if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags)
     {
-      if (req->flags & REQ_HARDBARRIER)
+      if (req->cmd_flags & REQ_HARDBARRIER)
       {
 	if(sdptr->ordered_tags)
 	{
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 94d1de55607f..65b19695ebe2 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -344,7 +344,7 @@ static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_co
 	pc->buffer = buf;
 	pc->c[0] = REQUEST_SENSE;
 	pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE;
-	rq->flags = REQ_SENSE;
+	rq->cmd_type = REQ_TYPE_SENSE;
 	pc->timeout = jiffies + WAIT_READY;
 	/* NOTE! Save the failed packet command in "rq->buffer" */
 	rq->buffer = (void *) failed_command->special;
@@ -398,12 +398,12 @@ static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
 	int errors = rq->errors;
 	unsigned long flags;
 
-	if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) {
+	if (!blk_special_request(rq) && !blk_sense_request(rq)) {
 		ide_end_request(drive, uptodate, nrsecs);
 		return 0;
 	}
 	ide_end_drive_cmd (drive, 0, 0);
-	if (rq->flags & REQ_SENSE) {
+	if (blk_sense_request(rq)) {
 		idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer;
 		if (log) {
 			printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
@@ -712,7 +712,7 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r
 	printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
 #endif /* IDESCSI_DEBUG_LOG */
 
-	if (rq->flags & (REQ_SPECIAL|REQ_SENSE)) {
+	if (blk_sense_request(rq) || blk_special_request(rq)) {
 		return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special);
 	}
 	blk_dump_rq_flags(rq, "ide-scsi: unsup command");
@@ -938,7 +938,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
 
 	ide_init_drive_cmd (rq);
 	rq->special = (char *) pc;
-	rq->flags = REQ_SPECIAL;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	spin_unlock_irq(host->host_lock);
 	rq->rq_disk = scsi->disk;
 	(void) ide_do_drive_cmd (drive, rq, ide_end);
@@ -992,7 +992,7 @@ static int idescsi_eh_abort (struct scsi_cmnd *cmd)
 		 */
 		printk (KERN_ERR "ide-scsi: cmd aborted!\n");
 
-		if (scsi->pc->rq->flags & REQ_SENSE)
+		if (blk_sense_request(scsi->pc->rq))
 			kfree(scsi->pc->buffer);
 		kfree(scsi->pc->rq);
 		kfree(scsi->pc);
@@ -1042,7 +1042,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 	/* kill current request */
 	blkdev_dequeue_request(req);
 	end_that_request_last(req, 0);
-	if (req->flags & REQ_SENSE)
+	if (blk_sense_request(req))
 		kfree(scsi->pc->buffer);
 	kfree(scsi->pc);
 	scsi->pc = NULL;
diff --git a/drivers/scsi/pluto.c b/drivers/scsi/pluto.c
index 0bd9c60e6455..aa60a5f1fbc3 100644
--- a/drivers/scsi/pluto.c
+++ b/drivers/scsi/pluto.c
@@ -67,7 +67,6 @@ static void __init pluto_detect_done(Scsi_Cmnd *SCpnt)
 
 static void __init pluto_detect_scsi_done(Scsi_Cmnd *SCpnt)
 {
-	SCpnt->request->rq_status = RQ_SCSI_DONE;
 	PLND(("Detect done %08lx\n", (long)SCpnt))
 	if (atomic_dec_and_test (&fcss))
 		up(&fc_sem);
@@ -166,7 +165,7 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
 		
 		SCpnt->cmd_len = COMMAND_SIZE(INQUIRY);
 	
-		SCpnt->request->rq_status = RQ_SCSI_BUSY;
+		SCpnt->request->cmd_flags &= ~REQ_STARTED;
 		
 		SCpnt->done = pluto_detect_done;
 		SCpnt->request_bufflen = 256;
@@ -178,7 +177,8 @@ int __init pluto_detect(struct scsi_host_template *tpnt)
 	for (retry = 0; retry < 5; retry++) {
 		for (i = 0; i < fcscount; i++) {
 			if (!fcs[i].fc) break;
-			if (fcs[i].cmd.request->rq_status != RQ_SCSI_DONE) {
+			if (!(fcs[i].cmd.request->cmd_flags & REQ_STARTED)) {
+				fcs[i].cmd.request->cmd_flags |= REQ_STARTED;
 				disable_irq(fcs[i].fc->irq);
 				PLND(("queuecommand %d %d\n", retry, i))
 				fcp_scsi_queuecommand (&(fcs[i].cmd), 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d6743b959a72..71084728eb42 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -82,7 +82,7 @@ static void scsi_unprep_request(struct request *req)
 {
 	struct scsi_cmnd *cmd = req->special;
 
-	req->flags &= ~REQ_DONTPREP;
+	req->cmd_flags &= ~REQ_DONTPREP;
 	req->special = NULL;
 
 	scsi_put_command(cmd);
@@ -196,7 +196,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	req->sense_len = 0;
 	req->retries = retries;
 	req->timeout = timeout;
-	req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
 
 	/*
 	 * head injection *required* here otherwise quiesce won't work
@@ -397,7 +398,8 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
 	req = blk_get_request(sdev->request_queue, write, gfp);
 	if (!req)
 		goto free_sense;
-	req->flags |= REQ_BLOCK_PC | REQ_QUIET;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_QUIET;
 
 	if (use_sg)
 		err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
@@ -933,7 +935,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 					break;
 				}
 			}
-			if (!(req->flags & REQ_QUIET)) {
+			if (!(req->cmd_flags & REQ_QUIET)) {
 				scmd_printk(KERN_INFO, cmd,
 					    "Device not ready: ");
 				scsi_print_sense_hdr("", &sshdr);
@@ -941,7 +943,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 			scsi_end_request(cmd, 0, this_count, 1);
 			return;
 		case VOLUME_OVERFLOW:
-			if (!(req->flags & REQ_QUIET)) {
+			if (!(req->cmd_flags & REQ_QUIET)) {
 				scmd_printk(KERN_INFO, cmd,
 					    "Volume overflow, CDB: ");
 				__scsi_print_command(cmd->cmnd);
@@ -963,7 +965,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		return;
 	}
 	if (result) {
-		if (!(req->flags & REQ_QUIET)) {
+		if (!(req->cmd_flags & REQ_QUIET)) {
 			scmd_printk(KERN_INFO, cmd,
 				    "SCSI error: return code = 0x%08x\n",
 				    result);
@@ -995,7 +997,7 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
 	/*
 	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
 	 */
-	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
+	if (blk_pc_request(req) && !req->bio) {
 		cmd->request_bufflen = req->data_len;
 		cmd->request_buffer = req->data;
 		req->buffer = req->data;
@@ -1139,13 +1141,12 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 	 * these two cases differently.  We differentiate by looking
 	 * at request->cmd, as this tells us the real story.
 	 */
-	if (req->flags & REQ_SPECIAL && req->special) {
+	if (blk_special_request(req) && req->special)
 		cmd = req->special;
-	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
-
-		if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
-			if(specials_only == SDEV_QUIESCE ||
-					specials_only == SDEV_BLOCK)
+	else if (blk_pc_request(req) || blk_fs_request(req)) {
+		if (unlikely(specials_only) && !(req->cmd_flags & REQ_PREEMPT)){
+			if (specials_only == SDEV_QUIESCE ||
+			    specials_only == SDEV_BLOCK)
 				goto defer;
 			
 			sdev_printk(KERN_ERR, sdev,
@@ -1153,7 +1154,6 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 			goto kill;
 		}
 			
-			
 		/*
 		 * Now try and find a command block that we can use.
 		 */
@@ -1184,7 +1184,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 	 * lock.  We hope REQ_STARTED prevents anything untoward from
 	 * happening now.
 	 */
-	if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+	if (blk_fs_request(req) || blk_pc_request(req)) {
 		int ret;
 
 		/*
@@ -1216,7 +1216,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 		/*
 		 * Initialize the actual SCSI command for this request.
 		 */
-		if (req->flags & REQ_BLOCK_PC) {
+		if (blk_pc_request(req)) {
 			scsi_setup_blk_pc_cmnd(cmd);
 		} else if (req->rq_disk) {
 			struct scsi_driver *drv;
@@ -1233,7 +1233,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 	/*
 	 * The request is now prepped, no need to come back here
 	 */
-	req->flags |= REQ_DONTPREP;
+	req->cmd_flags |= REQ_DONTPREP;
 	return BLKPREP_OK;
 
  defer:
@@ -1454,8 +1454,9 @@ static void scsi_request_fn(struct request_queue *q)
 		if (unlikely(cmd == NULL)) {
 			printk(KERN_CRIT "impossible request in %s.\n"
 					 "please mail a stack trace to "
-					 "linux-scsi@vger.kernel.org",
+					 "linux-scsi@vger.kernel.org\n",
 					 __FUNCTION__);
+			blk_dump_rq_flags(req, "foo");
 			BUG();
 		}
 		spin_lock(shost->host_lock);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 638cff41d436..10bc99c911fa 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -443,8 +443,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
 		SCpnt->cmnd[0] = READ_6;
 		SCpnt->sc_data_direction = DMA_FROM_DEVICE;
 	} else {
-		printk(KERN_ERR "sd: Unknown command %lx\n", rq->flags);
-/* overkill 	panic("Unknown sd command %lx\n", rq->flags); */
+		printk(KERN_ERR "sd: Unknown command %x\n", rq->cmd_flags);
 		return 0;
 	}
 
@@ -840,7 +839,7 @@ static int sd_issue_flush(struct device *dev, sector_t *error_sector)
 static void sd_prepare_flush(request_queue_t *q, struct request *rq)
 {
 	memset(rq->cmd, 0, sizeof(rq->cmd));
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->timeout = SD_TIMEOUT;
 	rq->cmd[0] = SYNCHRONIZE_CACHE;
 	rq->cmd_len = 10;
diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c
index 2f8073b73bf3..7f9bcef6adfa 100644
--- a/drivers/scsi/sun3_NCR5380.c
+++ b/drivers/scsi/sun3_NCR5380.c
@@ -2017,7 +2017,7 @@ static void NCR5380_information_transfer (struct Scsi_Host *instance)
 		if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
 						  != cmd))
 		{
-			if(cmd->request->flags & REQ_CMD) {
+			if(blk_fs_request(cmd->request)) {
 				sun3scsi_dma_setup(d, count,
 						   rq_data_dir(cmd->request));
 				sun3_dma_setup_done = cmd;
diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index 837173415d4c..44a99aeb8180 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c
@@ -524,7 +524,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 				    int write_flag)
 {
-	if(cmd->request->flags & REQ_CMD)
+	if(blk_fs_request(cmd->request))
  		return wanted;
 	else
 		return 0;
diff --git a/drivers/scsi/sun3_scsi_vme.c b/drivers/scsi/sun3_scsi_vme.c
index 008a82ab8521..f5742b84b27a 100644
--- a/drivers/scsi/sun3_scsi_vme.c
+++ b/drivers/scsi/sun3_scsi_vme.c
@@ -458,7 +458,7 @@ static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 				    int write_flag)
 {
-	if(cmd->request->flags & REQ_CMD)
+	if(blk_fs_request(cmd->request))
  		return wanted;
 	else
 		return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cfde8b3ee919..b2a412cf468f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -120,6 +120,86 @@ struct request_list {
 	wait_queue_head_t wait[2];
 };
 
+/*
+ * request command types
+ */
+enum rq_cmd_type_bits {
+	REQ_TYPE_FS		= 1,	/* fs request */
+	REQ_TYPE_BLOCK_PC,		/* scsi command */
+	REQ_TYPE_SENSE,			/* sense request */
+	REQ_TYPE_PM_SUSPEND,		/* suspend request */
+	REQ_TYPE_PM_RESUME,		/* resume request */
+	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
+	REQ_TYPE_FLUSH,			/* flush request */
+	REQ_TYPE_SPECIAL,		/* driver defined type */
+	REQ_TYPE_LINUX_BLOCK,		/* generic block layer message */
+	/*
+	 * for ATA/ATAPI devices. this really doesn't belong here, ide should
+	 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
+	 * private REQ_LB opcodes to differentiate what type of request this is
+	 */
+	REQ_TYPE_ATA_CMD,
+	REQ_TYPE_ATA_TASK,
+	REQ_TYPE_ATA_TASKFILE,
+};
+
+/*
+ * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
+ * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
+ * SCSI cdb.
+ *
+ * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
+ * typically to differentiate REQ_TYPE_SPECIAL requests.
+ *
+ */
+enum {
+	/*
+	 * just examples for now
+	 */
+	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
+	REQ_LB_OP_FLUSH = 0x41,		/* flush device */
+};
+
+/*
+ * request type modified bits. first three bits match BIO_RW* bits, important
+ */
+enum rq_flag_bits {
+	__REQ_RW,		/* not set, read. set, write */
+	__REQ_FAILFAST,		/* no low level driver retries */
+	__REQ_SORTED,		/* elevator knows about this request */
+	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
+	__REQ_HARDBARRIER,	/* may not be passed by drive either */
+	__REQ_FUA,		/* forced unit access */
+	__REQ_NOMERGE,		/* don't touch this for merging */
+	__REQ_STARTED,		/* drive already may have started this one */
+	__REQ_DONTPREP,		/* don't call prep for this one */
+	__REQ_QUEUED,		/* uses queueing */
+	__REQ_ELVPRIV,		/* elevator private data attached */
+	__REQ_FAILED,		/* set if the request failed */
+	__REQ_QUIET,		/* don't worry about errors */
+	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
+	__REQ_ORDERED_COLOR,	/* is before or after barrier */
+	__REQ_RW_SYNC,		/* request is sync (O_DIRECT) */
+	__REQ_NR_BITS,		/* stops here */
+};
+
+#define REQ_RW		(1 << __REQ_RW)
+#define REQ_FAILFAST	(1 << __REQ_FAILFAST)
+#define REQ_SORTED	(1 << __REQ_SORTED)
+#define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
+#define REQ_HARDBARRIER	(1 << __REQ_HARDBARRIER)
+#define REQ_FUA		(1 << __REQ_FUA)
+#define REQ_NOMERGE	(1 << __REQ_NOMERGE)
+#define REQ_STARTED	(1 << __REQ_STARTED)
+#define REQ_DONTPREP	(1 << __REQ_DONTPREP)
+#define REQ_QUEUED	(1 << __REQ_QUEUED)
+#define REQ_ELVPRIV	(1 << __REQ_ELVPRIV)
+#define REQ_FAILED	(1 << __REQ_FAILED)
+#define REQ_QUIET	(1 << __REQ_QUIET)
+#define REQ_PREEMPT	(1 << __REQ_PREEMPT)
+#define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
+#define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
+
 #define BLK_MAX_CDB	16
 
 /*
@@ -129,7 +209,8 @@ struct request {
 	struct list_head queuelist;
 	struct list_head donelist;
 
-	unsigned long flags;		/* see REQ_ bits below */
+	unsigned int cmd_flags;
+	enum rq_cmd_type_bits cmd_type;
 
 	/* Maintain bio traversal state for part by part I/O submission.
 	 * hard_* are block layer internals, no driver should touch them!
@@ -202,73 +283,7 @@ struct request {
 };
 
 /*
- * first three bits match BIO_RW* bits, important
- */
-enum rq_flag_bits {
-	__REQ_RW,		/* not set, read. set, write */
-	__REQ_FAILFAST,		/* no low level driver retries */
-	__REQ_SORTED,		/* elevator knows about this request */
-	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
-	__REQ_HARDBARRIER,	/* may not be passed by drive either */
-	__REQ_FUA,		/* forced unit access */
-	__REQ_CMD,		/* is a regular fs rw request */
-	__REQ_NOMERGE,		/* don't touch this for merging */
-	__REQ_STARTED,		/* drive already may have started this one */
-	__REQ_DONTPREP,		/* don't call prep for this one */
-	__REQ_QUEUED,		/* uses queueing */
-	__REQ_ELVPRIV,		/* elevator private data attached */
-	/*
-	 * for ATA/ATAPI devices
-	 */
-	__REQ_PC,		/* packet command (special) */
-	__REQ_BLOCK_PC,		/* queued down pc from block layer */
-	__REQ_SENSE,		/* sense retrival */
-
-	__REQ_FAILED,		/* set if the request failed */
-	__REQ_QUIET,		/* don't worry about errors */
-	__REQ_SPECIAL,		/* driver suplied command */
-	__REQ_DRIVE_CMD,
-	__REQ_DRIVE_TASK,
-	__REQ_DRIVE_TASKFILE,
-	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
-	__REQ_PM_SUSPEND,	/* suspend request */
-	__REQ_PM_RESUME,	/* resume request */
-	__REQ_PM_SHUTDOWN,	/* shutdown request */
-	__REQ_ORDERED_COLOR,	/* is before or after barrier */
-	__REQ_RW_SYNC,		/* request is sync (O_DIRECT) */
-	__REQ_NR_BITS,		/* stops here */
-};
-
-#define REQ_RW		(1 << __REQ_RW)
-#define REQ_FAILFAST	(1 << __REQ_FAILFAST)
-#define REQ_SORTED	(1 << __REQ_SORTED)
-#define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
-#define REQ_HARDBARRIER	(1 << __REQ_HARDBARRIER)
-#define REQ_FUA		(1 << __REQ_FUA)
-#define REQ_CMD		(1 << __REQ_CMD)
-#define REQ_NOMERGE	(1 << __REQ_NOMERGE)
-#define REQ_STARTED	(1 << __REQ_STARTED)
-#define REQ_DONTPREP	(1 << __REQ_DONTPREP)
-#define REQ_QUEUED	(1 << __REQ_QUEUED)
-#define REQ_ELVPRIV	(1 << __REQ_ELVPRIV)
-#define REQ_PC		(1 << __REQ_PC)
-#define REQ_BLOCK_PC	(1 << __REQ_BLOCK_PC)
-#define REQ_SENSE	(1 << __REQ_SENSE)
-#define REQ_FAILED	(1 << __REQ_FAILED)
-#define REQ_QUIET	(1 << __REQ_QUIET)
-#define REQ_SPECIAL	(1 << __REQ_SPECIAL)
-#define REQ_DRIVE_CMD	(1 << __REQ_DRIVE_CMD)
-#define REQ_DRIVE_TASK	(1 << __REQ_DRIVE_TASK)
-#define REQ_DRIVE_TASKFILE	(1 << __REQ_DRIVE_TASKFILE)
-#define REQ_PREEMPT	(1 << __REQ_PREEMPT)
-#define REQ_PM_SUSPEND	(1 << __REQ_PM_SUSPEND)
-#define REQ_PM_RESUME	(1 << __REQ_PM_RESUME)
-#define REQ_PM_SHUTDOWN	(1 << __REQ_PM_SHUTDOWN)
-#define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
-#define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
-
-/*
- * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
+ * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
  * requests. Some step values could eventually be made generic.
  */
 struct request_pm_state
@@ -490,25 +505,28 @@ enum {
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
 
-#define blk_fs_request(rq)	((rq)->flags & REQ_CMD)
-#define blk_pc_request(rq)	((rq)->flags & REQ_BLOCK_PC)
-#define blk_noretry_request(rq)	((rq)->flags & REQ_FAILFAST)
-#define blk_rq_started(rq)	((rq)->flags & REQ_STARTED)
+#define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
+#define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
+#define blk_special_request(rq)	((rq)->cmd_type == REQ_TYPE_SPECIAL)
+#define blk_sense_request(rq)	((rq)->cmd_type == REQ_TYPE_SENSE)
+
+#define blk_noretry_request(rq)	((rq)->cmd_flags & REQ_FAILFAST)
+#define blk_rq_started(rq)	((rq)->cmd_flags & REQ_STARTED)
 
 #define blk_account_rq(rq)	(blk_rq_started(rq) && blk_fs_request(rq))
 
-#define blk_pm_suspend_request(rq)	((rq)->flags & REQ_PM_SUSPEND)
-#define blk_pm_resume_request(rq)	((rq)->flags & REQ_PM_RESUME)
+#define blk_pm_suspend_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
+#define blk_pm_resume_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_pm_request(rq)	\
-	((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME))
+	(blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
 
-#define blk_sorted_rq(rq)	((rq)->flags & REQ_SORTED)
-#define blk_barrier_rq(rq)	((rq)->flags & REQ_HARDBARRIER)
-#define blk_fua_rq(rq)		((rq)->flags & REQ_FUA)
+#define blk_sorted_rq(rq)	((rq)->cmd_flags & REQ_SORTED)
+#define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
+#define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
-#define rq_data_dir(rq)		((rq)->flags & 1)
+#define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 
 static inline int blk_queue_full(struct request_queue *q, int rw)
 {
@@ -541,7 +559,7 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw)
 #define RQ_NOMERGE_FLAGS	\
 	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)	\
-	(!((rq)->flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
+	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
 
 /*
  * noop, requests are automagically marked as active/inactive by I/O
@@ -737,7 +755,7 @@ extern void blk_put_queue(request_queue_t *);
  */
 #define blk_queue_tag_depth(q)		((q)->queue_tags->busy)
 #define blk_queue_tag_queue(q)		((q)->queue_tags->busy < (q)->queue_tags->max_depth)
-#define blk_rq_tagged(rq)		((rq)->flags & REQ_QUEUED)
+#define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(request_queue_t *, struct request *);
 extern struct request *blk_queue_find_tag(request_queue_t *, int);
 extern void blk_queue_end_tag(request_queue_t *, struct request *);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7520cc1ff9e2..ea48eb1b3fd3 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -148,7 +148,7 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 				    u32 what)
 {
 	struct blk_trace *bt = q->blk_trace;
-	int rw = rq->flags & 0x03;
+	int rw = rq->cmd_flags & 0x03;
 
 	if (likely(!bt))
 		return;
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index d04d05adfa9b..bbf66219b769 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -100,7 +100,7 @@ static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
 	struct scsi_device *sdev = cmd->device;
 
         if (blk_rq_tagged(req)) {
-		if (sdev->ordered_tags && req->flags & REQ_HARDBARRIER)
+		if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER)
         	        *msg++ = MSG_ORDERED_TAG;
         	else
         	        *msg++ = MSG_SIMPLE_TAG;
-- 
cgit v1.2.3


From 9817064b68fef7e4580c6df1ea597e106b9ff88b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 28 Jul 2006 09:23:08 +0200
Subject: [PATCH] elevator: move the backmerging logic into the elevator core

Right now, every IO scheduler implements its own backmerging (except for
noop, which does no merging). That results in duplicated code for
essentially the same operation, which is never a good thing. This patch
moves the backmerging out of the io schedulers and into the elevator
core. We save 1.6kb of text and as a bonus get backmerging for noop as
well. Win-win!

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c       | 139 +-------------------------------------------
 block/cfq-iosched.c      |  86 +--------------------------
 block/deadline-iosched.c | 128 +----------------------------------------
 block/elevator.c         | 147 ++++++++++++++++++++++++++++++++++++++++++-----
 block/ll_rw_blk.c        |   2 +
 include/linux/blkdev.h   |  17 +-----
 include/linux/elevator.h |   2 +
 7 files changed, 146 insertions(+), 375 deletions(-)

(limited to 'include/linux')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index ad1cc4077819..6db494333c3a 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -14,7 +14,6 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
-#include <linux/hash.h>
 #include <linux/rbtree.h>
 #include <linux/interrupt.h>
 
@@ -95,7 +94,6 @@ struct as_data {
 
 	struct as_rq *next_arq[2];	/* next in sort order */
 	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
-	struct hlist_head *hash;	/* request hash */
 
 	unsigned long exit_prob;	/* probability a task will exit while
 					   being waited on */
@@ -161,11 +159,6 @@ struct as_rq {
 
 	struct io_context *io_context;	/* The submitting task */
 
-	/*
-	 * request hash, key is the ending offset (for back merge lookup)
-	 */
-	struct hlist_node hash;
-
 	/*
 	 * expire fifo
 	 */
@@ -272,77 +265,6 @@ static void as_put_io_context(struct as_rq *arq)
 	put_io_context(arq->io_context);
 }
 
-/*
- * the back merge hash support functions
- */
-static const int as_hash_shift = 6;
-#define AS_HASH_BLOCK(sec)	((sec) >> 3)
-#define AS_HASH_FN(sec)		(hash_long(AS_HASH_BLOCK((sec)), as_hash_shift))
-#define AS_HASH_ENTRIES		(1 << as_hash_shift)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-
-static inline void __as_del_arq_hash(struct as_rq *arq)
-{
-	hlist_del_init(&arq->hash);
-}
-
-static inline void as_del_arq_hash(struct as_rq *arq)
-{
-	if (!hlist_unhashed(&arq->hash))
-		__as_del_arq_hash(arq);
-}
-
-static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
-	struct request *rq = arq->request;
-
-	BUG_ON(!hlist_unhashed(&arq->hash));
-
-	hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq)
-{
-	struct request *rq = arq->request;
-	struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))];
-
-	if (hlist_unhashed(&arq->hash)) {
-		WARN_ON(1);
-		return;
-	}
-
-	if (&arq->hash != head->first) {
-		hlist_del(&arq->hash);
-		hlist_add_head(&arq->hash, head);
-	}
-}
-
-static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
-{
-	struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)];
-	struct hlist_node *entry, *next;
-	struct as_rq *arq;
-
-	hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) {
-		struct request *__rq = arq->request;
-
-		BUG_ON(hlist_unhashed(&arq->hash));
-
-		if (!rq_mergeable(__rq)) {
-			as_del_arq_hash(arq);
-			continue;
-		}
-
-		if (rq_hash_key(__rq) == offset)
-			return __rq;
-	}
-
-	return NULL;
-}
-
 /*
  * rb tree support functions
  */
@@ -1060,7 +982,6 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 		ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
 
 	list_del_init(&arq->fifo);
-	as_del_arq_hash(arq);
 	as_del_arq_rb(ad, arq);
 }
 
@@ -1349,8 +1270,6 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 	}
 
 	as_add_arq_rb(ad, arq);
-	if (rq_mergeable(arq->request))
-		as_add_arq_hash(ad, arq);
 
 	/*
 	 * set expire time (only used for reads) and add to fifo list
@@ -1428,42 +1347,17 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct as_data *ad = q->elevator->elevator_data;
 	sector_t rb_key = bio->bi_sector + bio_sectors(bio);
 	struct request *__rq;
-	int ret;
-
-	/*
-	 * see if the merge hash can satisfy a back merge
-	 */
-	__rq = as_find_arq_hash(ad, bio->bi_sector);
-	if (__rq) {
-		BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_BACK_MERGE;
-			goto out;
-		}
-	}
 
 	/*
 	 * check for front merge
 	 */
 	__rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio));
-	if (__rq) {
-		BUG_ON(rb_key != rq_rb_key(__rq));
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_FRONT_MERGE;
-			goto out;
-		}
+	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+		*req = __rq;
+		return ELEVATOR_FRONT_MERGE;
 	}
 
 	return ELEVATOR_NO_MERGE;
-out:
-	if (ret) {
-		if (rq_mergeable(__rq))
-			as_hot_arq_hash(ad, RQ_DATA(__rq));
-	}
-	*req = __rq;
-	return ret;
 }
 
 static void as_merged_request(request_queue_t *q, struct request *req)
@@ -1471,12 +1365,6 @@ static void as_merged_request(request_queue_t *q, struct request *req)
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(req);
 
-	/*
-	 * hash always needs to be repositioned, key is end sector
-	 */
-	as_del_arq_hash(arq);
-	as_add_arq_hash(ad, arq);
-
 	/*
 	 * if the merge was a front merge, we need to reposition request
 	 */
@@ -1501,13 +1389,6 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
 	BUG_ON(!arq);
 	BUG_ON(!anext);
 
-	/*
-	 * reposition arq (this is the merged request) in hash, and in rbtree
-	 * in case of a front merge
-	 */
-	as_del_arq_hash(arq);
-	as_add_arq_hash(ad, arq);
-
 	if (rq_rb_key(req) != arq->rb_key) {
 		as_del_arq_rb(ad, arq);
 		as_add_arq_rb(ad, arq);
@@ -1591,7 +1472,6 @@ static int as_set_request(request_queue_t *q, struct request *rq,
 		arq->request = rq;
 		arq->state = AS_RQ_PRESCHED;
 		arq->io_context = NULL;
-		INIT_HLIST_NODE(&arq->hash);
 		INIT_LIST_HEAD(&arq->fifo);
 		rq->elevator_private = arq;
 		return 0;
@@ -1628,7 +1508,6 @@ static void as_exit_queue(elevator_t *e)
 
 	mempool_destroy(ad->arq_pool);
 	put_io_context(ad->io_context);
-	kfree(ad->hash);
 	kfree(ad);
 }
 
@@ -1639,7 +1518,6 @@ static void as_exit_queue(elevator_t *e)
 static void *as_init_queue(request_queue_t *q, elevator_t *e)
 {
 	struct as_data *ad;
-	int i;
 
 	if (!arq_pool)
 		return NULL;
@@ -1651,17 +1529,9 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 
 	ad->q = q; /* Identify what queue the data belongs to */
 
-	ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES,
-				GFP_KERNEL, q->node);
-	if (!ad->hash) {
-		kfree(ad);
-		return NULL;
-	}
-
 	ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, arq_pool, q->node);
 	if (!ad->arq_pool) {
-		kfree(ad->hash);
 		kfree(ad);
 		return NULL;
 	}
@@ -1672,9 +1542,6 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 	init_timer(&ad->antic_timer);
 	INIT_WORK(&ad->antic_work, as_work_handler, q);
 
-	for (i = 0; i < AS_HASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&ad->hash[i]);
-
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
 	ad->sort_list[REQ_SYNC] = RB_ROOT;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3a3aee08ec5f..1b803c0c90f1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -41,16 +41,6 @@ static DEFINE_SPINLOCK(cfq_exit_lock);
 #define CFQ_QHASH_ENTRIES	(1 << CFQ_QHASH_SHIFT)
 #define list_entry_qhash(entry)	hlist_entry((entry), struct cfq_queue, cfq_hash)
 
-/*
- * for the hash of crq inside the cfqq
- */
-#define CFQ_MHASH_SHIFT		6
-#define CFQ_MHASH_BLOCK(sec)	((sec) >> 3)
-#define CFQ_MHASH_ENTRIES	(1 << CFQ_MHASH_SHIFT)
-#define CFQ_MHASH_FN(sec)	hash_long(CFQ_MHASH_BLOCK(sec), CFQ_MHASH_SHIFT)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-#define list_entry_hash(ptr)	hlist_entry((ptr), struct cfq_rq, hash)
-
 #define list_entry_cfqq(ptr)	list_entry((ptr), struct cfq_queue, cfq_list)
 #define list_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 
@@ -112,11 +102,6 @@ struct cfq_data {
 	 */
 	struct hlist_head *cfq_hash;
 
-	/*
-	 * global crq hash for all queues
-	 */
-	struct hlist_head *crq_hash;
-
 	mempool_t *crq_pool;
 
 	int rq_in_driver;
@@ -203,7 +188,6 @@ struct cfq_rq {
 	struct rb_node rb_node;
 	sector_t rb_key;
 	struct request *request;
-	struct hlist_node hash;
 
 	struct cfq_queue *cfq_queue;
 	struct cfq_io_context *io_context;
@@ -271,42 +255,6 @@ static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsi
 static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, gfp_t gfp_mask);
 
-/*
- * lots of deadline iosched dupes, can be abstracted later...
- */
-static inline void cfq_del_crq_hash(struct cfq_rq *crq)
-{
-	hlist_del_init(&crq->hash);
-}
-
-static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
-{
-	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
-
-	hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
-}
-
-static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
-{
-	struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
-	struct hlist_node *entry, *next;
-
-	hlist_for_each_safe(entry, next, hash_list) {
-		struct cfq_rq *crq = list_entry_hash(entry);
-		struct request *__rq = crq->request;
-
-		if (!rq_mergeable(__rq)) {
-			cfq_del_crq_hash(crq);
-			continue;
-		}
-
-		if (rq_hash_key(__rq) == offset)
-			return __rq;
-	}
-
-	return NULL;
-}
-
 /*
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
@@ -677,7 +625,6 @@ static void cfq_remove_request(struct request *rq)
 
 	list_del_init(&rq->queuelist);
 	cfq_del_crq_rb(crq);
-	cfq_del_crq_hash(crq);
 }
 
 static int
@@ -685,34 +632,20 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct request *__rq;
-	int ret;
-
-	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
-	if (__rq && elv_rq_merge_ok(__rq, bio)) {
-		ret = ELEVATOR_BACK_MERGE;
-		goto out;
-	}
 
 	__rq = cfq_find_rq_fmerge(cfqd, bio);
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
-		ret = ELEVATOR_FRONT_MERGE;
-		goto out;
+		*req = __rq;
+		return ELEVATOR_FRONT_MERGE;
 	}
 
 	return ELEVATOR_NO_MERGE;
-out:
-	*req = __rq;
-	return ret;
 }
 
 static void cfq_merged_request(request_queue_t *q, struct request *req)
 {
-	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_rq *crq = RQ_DATA(req);
 
-	cfq_del_crq_hash(crq);
-	cfq_add_crq_hash(cfqd, crq);
-
 	if (rq_rb_key(req) != crq->rb_key) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
@@ -1825,9 +1758,6 @@ static void cfq_insert_request(request_queue_t *q, struct request *rq)
 
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 
-	if (rq_mergeable(rq))
-		cfq_add_crq_hash(cfqd, crq);
-
 	cfq_crq_enqueued(cfqd, cfqq, crq);
 }
 
@@ -2055,7 +1985,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		RB_CLEAR_NODE(&crq->rb_node);
 		crq->rb_key = 0;
 		crq->request = rq;
-		INIT_HLIST_NODE(&crq->hash);
 		crq->cfq_queue = cfqq;
 		crq->io_context = cic;
 
@@ -2221,7 +2150,6 @@ static void cfq_exit_queue(elevator_t *e)
 	cfq_shutdown_timer_wq(cfqd);
 
 	mempool_destroy(cfqd->crq_pool);
-	kfree(cfqd->crq_hash);
 	kfree(cfqd->cfq_hash);
 	kfree(cfqd);
 }
@@ -2246,20 +2174,14 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&cfqd->empty_list);
 	INIT_LIST_HEAD(&cfqd->cic_list);
 
-	cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
-	if (!cfqd->crq_hash)
-		goto out_crqhash;
-
 	cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
 	if (!cfqd->cfq_hash)
-		goto out_cfqhash;
+		goto out_crqhash;
 
 	cfqd->crq_pool = mempool_create_slab_pool(BLKDEV_MIN_RQ, crq_pool);
 	if (!cfqd->crq_pool)
 		goto out_crqpool;
 
-	for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
 	for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
 		INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
 
@@ -2289,8 +2211,6 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	return cfqd;
 out_crqpool:
 	kfree(cfqd->cfq_hash);
-out_cfqhash:
-	kfree(cfqd->crq_hash);
 out_crqhash:
 	kfree(cfqd);
 	return NULL;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index c7ca9f0b6498..b66e820f544d 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
-#include <linux/hash.h>
 #include <linux/rbtree.h>
 
 /*
@@ -24,13 +23,6 @@ static const int writes_starved = 2;    /* max times reads can starve a write */
 static const int fifo_batch = 16;       /* # of sequential requests treated as one
 				     by the above parameters. For throughput. */
 
-static const int deadline_hash_shift = 5;
-#define DL_HASH_BLOCK(sec)	((sec) >> 3)
-#define DL_HASH_FN(sec)		(hash_long(DL_HASH_BLOCK((sec)), deadline_hash_shift))
-#define DL_HASH_ENTRIES		(1 << deadline_hash_shift)
-#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
-#define ON_HASH(drq)		(!hlist_unhashed(&(drq)->hash))
-
 struct deadline_data {
 	/*
 	 * run time data
@@ -46,7 +38,6 @@ struct deadline_data {
 	 * next in sort order. read, write or both are NULL
 	 */
 	struct deadline_rq *next_drq[2];
-	struct hlist_head *hash;	/* request hash */
 	unsigned int batching;		/* number of sequential requests made */
 	sector_t last_sector;		/* head position */
 	unsigned int starved;		/* times reads have starved writes */
@@ -74,11 +65,6 @@ struct deadline_rq {
 
 	struct request *request;
 
-	/*
-	 * request hash, key is the ending offset (for back merge lookup)
-	 */
-	struct hlist_node hash;
-
 	/*
 	 * expire fifo
 	 */
@@ -92,69 +78,6 @@ static kmem_cache_t *drq_pool;
 
 #define RQ_DATA(rq)	((struct deadline_rq *) (rq)->elevator_private)
 
-/*
- * the back merge hash support functions
- */
-static inline void __deadline_del_drq_hash(struct deadline_rq *drq)
-{
-	hlist_del_init(&drq->hash);
-}
-
-static inline void deadline_del_drq_hash(struct deadline_rq *drq)
-{
-	if (ON_HASH(drq))
-		__deadline_del_drq_hash(drq);
-}
-
-static inline void
-deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
-	struct request *rq = drq->request;
-
-	BUG_ON(ON_HASH(drq));
-
-	hlist_add_head(&drq->hash, &dd->hash[DL_HASH_FN(rq_hash_key(rq))]);
-}
-
-/*
- * move hot entry to front of chain
- */
-static inline void
-deadline_hot_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
-{
-	struct request *rq = drq->request;
-	struct hlist_head *head = &dd->hash[DL_HASH_FN(rq_hash_key(rq))];
-
-	if (ON_HASH(drq) && &drq->hash != head->first) {
-		hlist_del(&drq->hash);
-		hlist_add_head(&drq->hash, head);
-	}
-}
-
-static struct request *
-deadline_find_drq_hash(struct deadline_data *dd, sector_t offset)
-{
-	struct hlist_head *hash_list = &dd->hash[DL_HASH_FN(offset)];
-	struct hlist_node *entry, *next;
-	struct deadline_rq *drq;
-
-	hlist_for_each_entry_safe(drq, entry, next, hash_list, hash) {
-		struct request *__rq = drq->request;
-
-		BUG_ON(!ON_HASH(drq));
-
-		if (!rq_mergeable(__rq)) {
-			__deadline_del_drq_hash(drq);
-			continue;
-		}
-
-		if (rq_hash_key(__rq) == offset)
-			return __rq;
-	}
-
-	return NULL;
-}
-
 /*
  * rb tree support functions
  */
@@ -267,22 +190,19 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq = RQ_DATA(rq);
-
 	const int data_dir = rq_data_dir(drq->request);
 
 	deadline_add_drq_rb(dd, drq);
+
 	/*
 	 * set expire time (only used for reads) and add to fifo list
 	 */
 	drq->expires = jiffies + dd->fifo_expire[data_dir];
 	list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
-
-	if (rq_mergeable(rq))
-		deadline_add_drq_hash(dd, drq);
 }
 
 /*
- * remove rq from rbtree, fifo, and hash
+ * remove rq from rbtree and fifo.
  */
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 {
@@ -291,7 +211,6 @@ static void deadline_remove_request(request_queue_t *q, struct request *rq)
 
 	list_del_init(&drq->fifo);
 	deadline_del_drq_rb(dd, drq);
-	deadline_del_drq_hash(drq);
 }
 
 static int
@@ -301,19 +220,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	/*
-	 * see if the merge hash can satisfy a back merge
-	 */
-	__rq = deadline_find_drq_hash(dd, bio->bi_sector);
-	if (__rq) {
-		BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
-		if (elv_rq_merge_ok(__rq, bio)) {
-			ret = ELEVATOR_BACK_MERGE;
-			goto out;
-		}
-	}
-
 	/*
 	 * check for front merge
 	 */
@@ -333,8 +239,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	if (ret)
-		deadline_hot_drq_hash(dd, RQ_DATA(__rq));
 	*req = __rq;
 	return ret;
 }
@@ -344,12 +248,6 @@ static void deadline_merged_request(request_queue_t *q, struct request *req)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq = RQ_DATA(req);
 
-	/*
-	 * hash always needs to be repositioned, key is end sector
-	 */
-	deadline_del_drq_hash(drq);
-	deadline_add_drq_hash(dd, drq);
-
 	/*
 	 * if the merge was a front merge, we need to reposition request
 	 */
@@ -370,13 +268,6 @@ deadline_merged_requests(request_queue_t *q, struct request *req,
 	BUG_ON(!drq);
 	BUG_ON(!dnext);
 
-	/*
-	 * reposition drq (this is the merged request) in hash, and in rbtree
-	 * in case of a front merge
-	 */
-	deadline_del_drq_hash(drq);
-	deadline_add_drq_hash(dd, drq);
-
 	if (rq_rb_key(req) != drq->rb_key) {
 		deadline_del_drq_rb(dd, drq);
 		deadline_add_drq_rb(dd, drq);
@@ -594,7 +485,6 @@ static void deadline_exit_queue(elevator_t *e)
 	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
 
 	mempool_destroy(dd->drq_pool);
-	kfree(dd->hash);
 	kfree(dd);
 }
 
@@ -605,7 +495,6 @@ static void deadline_exit_queue(elevator_t *e)
 static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
 {
 	struct deadline_data *dd;
-	int i;
 
 	if (!drq_pool)
 		return NULL;
@@ -615,24 +504,13 @@ static void *deadline_init_queue(request_queue_t *q, elevator_t *e)
 		return NULL;
 	memset(dd, 0, sizeof(*dd));
 
-	dd->hash = kmalloc_node(sizeof(struct hlist_head)*DL_HASH_ENTRIES,
-				GFP_KERNEL, q->node);
-	if (!dd->hash) {
-		kfree(dd);
-		return NULL;
-	}
-
 	dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 					mempool_free_slab, drq_pool, q->node);
 	if (!dd->drq_pool) {
-		kfree(dd->hash);
 		kfree(dd);
 		return NULL;
 	}
 
-	for (i = 0; i < DL_HASH_ENTRIES; i++)
-		INIT_HLIST_HEAD(&dd->hash[i]);
-
 	INIT_LIST_HEAD(&dd->fifo_list[READ]);
 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 	dd->sort_list[READ] = RB_ROOT;
@@ -667,8 +545,6 @@ deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		RB_CLEAR_NODE(&drq->rb_node);
 		drq->request = rq;
 
-		INIT_HLIST_NODE(&drq->hash);
-
 		INIT_LIST_HEAD(&drq->fifo);
 
 		rq->elevator_private = drq;
diff --git a/block/elevator.c b/block/elevator.c
index 4ac97b642042..cff1102dac9d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -33,12 +33,23 @@
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
+#include <linux/hash.h>
 
 #include <asm/uaccess.h>
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
+/*
+ * Merge hash stuff.
+ */
+static const int elv_hash_shift = 6;
+#define ELV_HASH_BLOCK(sec)	((sec) >> 3)
+#define ELV_HASH_FN(sec)	(hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
+#define ELV_HASH_ENTRIES	(1 << elv_hash_shift)
+#define rq_hash_key(rq)		((rq)->sector + (rq)->nr_sectors)
+#define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
+
 /*
  * can we safely merge with this request?
  */
@@ -153,25 +164,41 @@ static struct kobj_type elv_ktype;
 
 static elevator_t *elevator_alloc(struct elevator_type *e)
 {
-	elevator_t *eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
-	if (eq) {
-		memset(eq, 0, sizeof(*eq));
-		eq->ops = &e->ops;
-		eq->elevator_type = e;
-		kobject_init(&eq->kobj);
-		snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
-		eq->kobj.ktype = &elv_ktype;
-		mutex_init(&eq->sysfs_lock);
-	} else {
-		elevator_put(e);
-	}
+	elevator_t *eq;
+	int i;
+
+	eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
+	if (unlikely(!eq))
+		goto err;
+
+	memset(eq, 0, sizeof(*eq));
+	eq->ops = &e->ops;
+	eq->elevator_type = e;
+	kobject_init(&eq->kobj);
+	snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");
+	eq->kobj.ktype = &elv_ktype;
+	mutex_init(&eq->sysfs_lock);
+
+	eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL);
+	if (!eq->hash)
+		goto err;
+
+	for (i = 0; i < ELV_HASH_ENTRIES; i++)
+		INIT_HLIST_HEAD(&eq->hash[i]);
+
 	return eq;
+err:
+	kfree(eq);
+	elevator_put(e);
+	return NULL;
 }
 
 static void elevator_release(struct kobject *kobj)
 {
 	elevator_t *e = container_of(kobj, elevator_t, kobj);
+
 	elevator_put(e->elevator_type);
+	kfree(e->hash);
 	kfree(e);
 }
 
@@ -223,6 +250,53 @@ void elevator_exit(elevator_t *e)
 	kobject_put(&e->kobj);
 }
 
+static inline void __elv_rqhash_del(struct request *rq)
+{
+	hlist_del_init(&rq->hash);
+}
+
+static void elv_rqhash_del(request_queue_t *q, struct request *rq)
+{
+	if (ELV_ON_HASH(rq))
+		__elv_rqhash_del(rq);
+}
+
+static void elv_rqhash_add(request_queue_t *q, struct request *rq)
+{
+	elevator_t *e = q->elevator;
+
+	BUG_ON(ELV_ON_HASH(rq));
+	hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
+}
+
+static void elv_rqhash_reposition(request_queue_t *q, struct request *rq)
+{
+	__elv_rqhash_del(rq);
+	elv_rqhash_add(q, rq);
+}
+
+static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
+{
+	elevator_t *e = q->elevator;
+	struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
+	struct hlist_node *entry, *next;
+	struct request *rq;
+
+	hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+		BUG_ON(!ELV_ON_HASH(rq));
+
+		if (unlikely(!rq_mergeable(rq))) {
+			__elv_rqhash_del(rq);
+			continue;
+		}
+
+		if (rq_hash_key(rq) == offset)
+			return rq;
+	}
+
+	return NULL;
+}
+
 /*
  * Insert rq into dispatch queue of q.  Queue lock must be held on
  * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
@@ -235,6 +309,9 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 
 	if (q->last_merge == rq)
 		q->last_merge = NULL;
+
+	elv_rqhash_del(q, rq);
+
 	q->nr_sorted--;
 
 	boundary = q->end_sector;
@@ -258,11 +335,32 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 	list_add(&rq->queuelist, entry);
 }
 
+/*
+ * This should be in elevator.h, but that requires pulling in rq and q
+ */
+void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
+{
+	if (q->last_merge == rq)
+		q->last_merge = NULL;
+
+	elv_rqhash_del(q, rq);
+
+	q->nr_sorted--;
+
+	q->end_sector = rq_end_sector(rq);
+	q->boundary_rq = rq;
+	list_add_tail(&rq->queuelist, &q->queue_head);
+}
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
+	struct request *__rq;
 	int ret;
 
+	/*
+	 * First try one-hit cache.
+	 */
 	if (q->last_merge) {
 		ret = elv_try_merge(q->last_merge, bio);
 		if (ret != ELEVATOR_NO_MERGE) {
@@ -271,6 +369,15 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 		}
 	}
 
+	/*
+	 * See if our hash lookup can find a potential backmerge.
+	 */
+	__rq = elv_rqhash_find(q, bio->bi_sector);
+	if (__rq && elv_rq_merge_ok(__rq, bio)) {
+		*req = __rq;
+		return ELEVATOR_BACK_MERGE;
+	}
+
 	if (e->ops->elevator_merge_fn)
 		return e->ops->elevator_merge_fn(q, req, bio);
 
@@ -284,6 +391,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq)
 	if (e->ops->elevator_merged_fn)
 		e->ops->elevator_merged_fn(q, rq);
 
+	elv_rqhash_reposition(q, rq);
+
 	q->last_merge = rq;
 }
 
@@ -294,8 +403,11 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
 
 	if (e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
-	q->nr_sorted--;
 
+	elv_rqhash_reposition(q, rq);
+	elv_rqhash_del(q, next);
+
+	q->nr_sorted--;
 	q->last_merge = rq;
 }
 
@@ -371,8 +483,12 @@ void elv_insert(request_queue_t *q, struct request *rq, int where)
 		BUG_ON(!blk_fs_request(rq));
 		rq->cmd_flags |= REQ_SORTED;
 		q->nr_sorted++;
-		if (q->last_merge == NULL && rq_mergeable(rq))
-			q->last_merge = rq;
+		if (rq_mergeable(rq)) {
+			elv_rqhash_add(q, rq);
+			if (!q->last_merge)
+				q->last_merge = rq;
+		}
+
 		/*
 		 * Some ioscheds (cfq) run q->request_fn directly, so
 		 * rq cannot be accessed after calling
@@ -557,6 +673,7 @@ struct request *elv_next_request(request_queue_t *q)
 void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
 	BUG_ON(list_empty(&rq->queuelist));
+	BUG_ON(ELV_ON_HASH(rq));
 
 	list_del_init(&rq->queuelist);
 
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9b91bb70c5ed..9cbf7b550c78 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -281,6 +281,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->donelist);
+	INIT_HLIST_NODE(&rq->hash);
 
 	rq->errors = 0;
 	rq->rq_status = RQ_ACTIVE;
@@ -2700,6 +2701,7 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 		int priv = req->cmd_flags & REQ_ELVPRIV;
 
 		BUG_ON(!list_empty(&req->queuelist));
+		BUG_ON(!hlist_unhashed(&req->hash));
 
 		blk_free_request(q, req);
 		freed_request(q, rw, priv);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b2a412cf468f..8f5486964671 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -229,6 +229,8 @@ struct request {
 	struct bio *bio;
 	struct bio *biotail;
 
+	struct hlist_node hash;	/* merge hash */
+
 	void *elevator_private;
 	void *completion_data;
 
@@ -696,21 +698,6 @@ static inline void blkdev_dequeue_request(struct request *req)
 	elv_dequeue_request(req->q, req);
 }
 
-/*
- * This should be in elevator.h, but that requires pulling in rq and q
- */
-static inline void elv_dispatch_add_tail(struct request_queue *q,
-					 struct request *rq)
-{
-	if (q->last_merge == rq)
-		q->last_merge = NULL;
-	q->nr_sorted--;
-
-	q->end_sector = rq_end_sector(rq);
-	q->boundary_rq = rq;
-	list_add_tail(&rq->queuelist, &q->queue_head);
-}
-
 /*
  * Access functions for manipulating queue properties
  */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1713ace808bf..2c270e90b33e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -82,12 +82,14 @@ struct elevator_queue
 	struct kobject kobj;
 	struct elevator_type *elevator_type;
 	struct mutex sysfs_lock;
+	struct hlist_head *hash;
 };
 
 /*
  * block elevator interface
  */
 extern void elv_dispatch_sort(request_queue_t *, struct request *);
+extern void elv_dispatch_add_tail(request_queue_t *, struct request *);
 extern void elv_add_request(request_queue_t *, struct request *, int, int);
 extern void __elv_add_request(request_queue_t *, struct request *, int, int);
 extern void elv_insert(request_queue_t *, struct request *, int);
-- 
cgit v1.2.3


From 10fd48f2376db52f08bf0420d2c4f580e39269e1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 11 Jul 2006 21:15:52 +0200
Subject: [PATCH] rbtree: fixed reversed RB_EMPTY_NODE and rb_next/prev

The conditions got reserved. Also make rb_next() and rb_prev() check
for the empty condition.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c     | 4 ++--
 include/linux/rbtree.h | 2 +-
 lib/rbtree.c           | 6 ++++++
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 6db494333c3a..9d0f15a54c64 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -336,7 +336,7 @@ static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
 
 static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
 {
-	if (!RB_EMPTY_NODE(&arq->rb_node)) {
+	if (RB_EMPTY_NODE(&arq->rb_node)) {
 		WARN_ON(1);
 		return;
 	}
@@ -1039,7 +1039,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	struct request *rq = arq->request;
 	const int data_dir = arq->is_sync;
 
-	BUG_ON(!RB_EMPTY_NODE(&arq->rb_node));
+	BUG_ON(RB_EMPTY_NODE(&arq->rb_node));
 
 	as_antic_stop(ad);
 	ad->antic_status = ANTIC_OFF;
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 8d5382e62c08..344bc3495ddb 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -133,7 +133,7 @@ static inline void rb_set_color(struct rb_node *rb, int color)
 #define	rb_entry(ptr, type, member) container_of(ptr, type, member)
 
 #define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
-#define RB_EMPTY_NODE(node)	(rb_parent(node) != node)
+#define RB_EMPTY_NODE(node)	(rb_parent(node) == node)
 #define RB_CLEAR_NODE(node)	(rb_set_parent(node, node))
 
 extern void rb_insert_color(struct rb_node *, struct rb_root *);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1e55ba1c2edf..48499c2d88cc 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -322,6 +322,9 @@ struct rb_node *rb_next(struct rb_node *node)
 {
 	struct rb_node *parent;
 
+	if (rb_parent(node) == node)
+		return NULL;
+
 	/* If we have a right-hand child, go down and then left as far
 	   as we can. */
 	if (node->rb_right) {
@@ -348,6 +351,9 @@ struct rb_node *rb_prev(struct rb_node *node)
 {
 	struct rb_node *parent;
 
+	if (rb_parent(node) == node)
+		return NULL;
+
 	/* If we have a left-hand child, go down and then right as far
 	   as we can. */
 	if (node->rb_left) {
-- 
cgit v1.2.3


From 2e662b65f05d550b6799ed6bfa9963b82279e6b7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 13 Jul 2006 11:55:04 +0200
Subject: [PATCH] elevator: abstract out the rbtree sort handling

The rbtree sort/lookup/reposition logic is mostly duplicated in
cfq/deadline/as, so move it to the elevator core. The io schedulers
still provide the actual rb root, as we don't want to impose any sort
of specific handling on the schedulers.

Introduce the helpers and rb_node in struct request to help migrate the
IO schedulers.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/elevator.c         | 123 +++++++++++++++++++++++++++++++++++++++++------
 block/ll_rw_blk.c        |   7 +--
 include/linux/blkdev.h   |   1 +
 include/linux/elevator.h |  18 ++++++-
 4 files changed, 130 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index cff1102dac9d..cbbc36ba016a 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -239,6 +239,8 @@ int elevator_init(request_queue_t *q, char *name)
 	return ret;
 }
 
+EXPORT_SYMBOL(elevator_init);
+
 void elevator_exit(elevator_t *e)
 {
 	mutex_lock(&e->sysfs_lock);
@@ -250,6 +252,8 @@ void elevator_exit(elevator_t *e)
 	kobject_put(&e->kobj);
 }
 
+EXPORT_SYMBOL(elevator_exit);
+
 static inline void __elv_rqhash_del(struct request *rq)
 {
 	hlist_del_init(&rq->hash);
@@ -297,10 +301,69 @@ static struct request *elv_rqhash_find(request_queue_t *q, sector_t offset)
 	return NULL;
 }
 
+/*
+ * RB-tree support functions for inserting/lookup/removal of requests
+ * in a sorted RB tree.
+ */
+struct request *elv_rb_add(struct rb_root *root, struct request *rq)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct request *__rq;
+
+	while (*p) {
+		parent = *p;
+		__rq = rb_entry(parent, struct request, rb_node);
+
+		if (rq->sector < __rq->sector)
+			p = &(*p)->rb_left;
+		else if (rq->sector > __rq->sector)
+			p = &(*p)->rb_right;
+		else
+			return __rq;
+	}
+
+	rb_link_node(&rq->rb_node, parent, p);
+	rb_insert_color(&rq->rb_node, root);
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_add);
+
+void elv_rb_del(struct rb_root *root, struct request *rq)
+{
+	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
+	rb_erase(&rq->rb_node, root);
+	RB_CLEAR_NODE(&rq->rb_node);
+}
+
+EXPORT_SYMBOL(elv_rb_del);
+
+struct request *elv_rb_find(struct rb_root *root, sector_t sector)
+{
+	struct rb_node *n = root->rb_node;
+	struct request *rq;
+
+	while (n) {
+		rq = rb_entry(n, struct request, rb_node);
+
+		if (sector < rq->sector)
+			n = n->rb_left;
+		else if (sector > rq->sector)
+			n = n->rb_right;
+		else
+			return rq;
+	}
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_find);
+
 /*
  * Insert rq into dispatch queue of q.  Queue lock must be held on
- * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
- * appended to the dispatch queue.  To be used by specific elevators.
+ * entry.  rq is sort insted into the dispatch queue. To be used by
+ * specific elevators.
  */
 void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 {
@@ -335,8 +398,12 @@ void elv_dispatch_sort(request_queue_t *q, struct request *rq)
 	list_add(&rq->queuelist, entry);
 }
 
+EXPORT_SYMBOL(elv_dispatch_sort);
+
 /*
- * This should be in elevator.h, but that requires pulling in rq and q
+ * Insert rq into dispatch queue of q.  Queue lock must be held on
+ * entry.  rq is added to the back of the dispatch queue. To be used by
+ * specific elevators.
  */
 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
 {
@@ -352,6 +419,8 @@ void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
 	list_add_tail(&rq->queuelist, &q->queue_head);
 }
 
+EXPORT_SYMBOL(elv_dispatch_add_tail);
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
@@ -384,14 +453,15 @@ int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	return ELEVATOR_NO_MERGE;
 }
 
-void elv_merged_request(request_queue_t *q, struct request *rq)
+void elv_merged_request(request_queue_t *q, struct request *rq, int type)
 {
 	elevator_t *e = q->elevator;
 
 	if (e->ops->elevator_merged_fn)
-		e->ops->elevator_merged_fn(q, rq);
+		e->ops->elevator_merged_fn(q, rq, type);
 
-	elv_rqhash_reposition(q, rq);
+	if (type == ELEVATOR_BACK_MERGE)
+		elv_rqhash_reposition(q, rq);
 
 	q->last_merge = rq;
 }
@@ -577,6 +647,8 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 	elv_insert(q, rq, where);
 }
 
+EXPORT_SYMBOL(__elv_add_request);
+
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
 		     int plug)
 {
@@ -587,6 +659,8 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
+EXPORT_SYMBOL(elv_add_request);
+
 static inline struct request *__elv_next_request(request_queue_t *q)
 {
 	struct request *rq;
@@ -670,6 +744,8 @@ struct request *elv_next_request(request_queue_t *q)
 	return rq;
 }
 
+EXPORT_SYMBOL(elv_next_request);
+
 void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
 	BUG_ON(list_empty(&rq->queuelist));
@@ -686,6 +762,8 @@ void elv_dequeue_request(request_queue_t *q, struct request *rq)
 		q->in_flight++;
 }
 
+EXPORT_SYMBOL(elv_dequeue_request);
+
 int elv_queue_empty(request_queue_t *q)
 {
 	elevator_t *e = q->elevator;
@@ -699,6 +777,8 @@ int elv_queue_empty(request_queue_t *q)
 	return 1;
 }
 
+EXPORT_SYMBOL(elv_queue_empty);
+
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
 {
 	elevator_t *e = q->elevator;
@@ -1025,11 +1105,26 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
 	return len;
 }
 
-EXPORT_SYMBOL(elv_dispatch_sort);
-EXPORT_SYMBOL(elv_add_request);
-EXPORT_SYMBOL(__elv_add_request);
-EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_dequeue_request);
-EXPORT_SYMBOL(elv_queue_empty);
-EXPORT_SYMBOL(elevator_exit);
-EXPORT_SYMBOL(elevator_init);
+struct request *elv_rb_former_request(request_queue_t *q, struct request *rq)
+{
+	struct rb_node *rbprev = rb_prev(&rq->rb_node);
+
+	if (rbprev)
+		return rb_entry_rq(rbprev);
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_former_request);
+
+struct request *elv_rb_latter_request(request_queue_t *q, struct request *rq)
+{
+	struct rb_node *rbnext = rb_next(&rq->rb_node);
+
+	if (rbnext)
+		return rb_entry_rq(rbnext);
+
+	return NULL;
+}
+
+EXPORT_SYMBOL(elv_rb_latter_request);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9cbf7b550c78..d388486e98bb 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -281,11 +281,12 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->donelist);
-	INIT_HLIST_NODE(&rq->hash);
 
 	rq->errors = 0;
 	rq->rq_status = RQ_ACTIVE;
 	rq->bio = rq->biotail = NULL;
+	INIT_HLIST_NODE(&rq->hash);
+	RB_CLEAR_NODE(&rq->rb_node);
 	rq->ioprio = 0;
 	rq->buffer = NULL;
 	rq->ref_count = 1;
@@ -2943,7 +2944,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_back_merge(q, req))
-				elv_merged_request(q, req);
+				elv_merged_request(q, req, el_ret);
 			goto out;
 
 		case ELEVATOR_FRONT_MERGE:
@@ -2970,7 +2971,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 			req->ioprio = ioprio_best(req->ioprio, prio);
 			drive_stat_acct(req, nr_sectors, 0);
 			if (!attempt_front_merge(q, req))
-				elv_merged_request(q, req);
+				elv_merged_request(q, req, el_ret);
 			goto out;
 
 		/* ELV_NO_MERGE: elevator says don't/can't merge. */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8f5486964671..a905c4934a55 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -230,6 +230,7 @@ struct request {
 	struct bio *biotail;
 
 	struct hlist_node hash;	/* merge hash */
+	struct rb_node rb_node;	/* sort/lookup */
 
 	void *elevator_private;
 	void *completion_data;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 2c270e90b33e..95b2a04b969c 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -6,7 +6,7 @@ typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
 
 typedef void (elevator_merge_req_fn) (request_queue_t *, struct request *, struct request *);
 
-typedef void (elevator_merged_fn) (request_queue_t *, struct request *);
+typedef void (elevator_merged_fn) (request_queue_t *, struct request *, int);
 
 typedef int (elevator_dispatch_fn) (request_queue_t *, int);
 
@@ -96,7 +96,7 @@ extern void elv_insert(request_queue_t *, struct request *, int);
 extern int elv_merge(request_queue_t *, struct request **, struct bio *);
 extern void elv_merge_requests(request_queue_t *, struct request *,
 			       struct request *);
-extern void elv_merged_request(request_queue_t *, struct request *);
+extern void elv_merged_request(request_queue_t *, struct request *, int);
 extern void elv_dequeue_request(request_queue_t *, struct request *);
 extern void elv_requeue_request(request_queue_t *, struct request *);
 extern int elv_queue_empty(request_queue_t *);
@@ -126,6 +126,19 @@ extern int elevator_init(request_queue_t *, char *);
 extern void elevator_exit(elevator_t *);
 extern int elv_rq_merge_ok(struct request *, struct bio *);
 
+/*
+ * Helper functions.
+ */
+extern struct request *elv_rb_former_request(request_queue_t *, struct request *);
+extern struct request *elv_rb_latter_request(request_queue_t *, struct request *);
+
+/*
+ * rb support functions.
+ */
+extern struct request *elv_rb_add(struct rb_root *, struct request *);
+extern void elv_rb_del(struct rb_root *, struct request *);
+extern struct request *elv_rb_find(struct rb_root *, sector_t);
+
 /*
  * Return values from elevator merger
  */
@@ -151,5 +164,6 @@ enum {
 };
 
 #define rq_end_sector(rq)	((rq)->sector + (rq)->nr_sectors)
+#define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
 #endif
-- 
cgit v1.2.3


From 1fbfdfcddff4df188b24d9d05271a76a85064583 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 11 Jul 2006 21:49:15 +0200
Subject: [PATCH] elevator: introduce a way to reuse rq for internal FIFO
 handling

The io schedulers can use this instead of having to allocate space for
it themselves.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 include/linux/elevator.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 95b2a04b969c..0e7b1a733919 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -166,4 +166,16 @@ enum {
 #define rq_end_sector(rq)	((rq)->sector + (rq)->nr_sectors)
 #define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
+/*
+ * Hack to reuse the donelist list_head as the fifo time holder while
+ * the request is in the io scheduler. Saves an unsigned long in rq.
+ */
+#define rq_fifo_time(rq)	((unsigned long) (rq)->donelist.next)
+#define rq_set_fifo_time(rq,exp)	((rq)->donelist.next = (void *) (exp))
+#define rq_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
+#define rq_fifo_clear(rq)	do {		\
+	list_del_init(&(rq)->queuelist);	\
+	INIT_LIST_HEAD(&(rq)->donelist);	\
+	} while (0)
+
 #endif
-- 
cgit v1.2.3


From 9e2585a8a23f3a42f815b2a638725d85a921cd65 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 28 Jul 2006 09:26:13 +0200
Subject: [PATCH] as-iosched: remove arq->is_sync member

We can track this in struct request.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
---
 block/as-iosched.c     | 36 ++++++++++++++----------------------
 include/linux/blkdev.h |  5 +++++
 2 files changed, 19 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index c2665467950e..dca0b0563ca0 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -151,7 +151,6 @@ struct as_rq {
 
 	struct io_context *io_context;	/* The submitting task */
 
-	unsigned int is_sync;
 	enum arq_state state;
 };
 
@@ -241,7 +240,7 @@ static void as_put_io_context(struct as_rq *arq)
 
 	aic = arq->io_context->aic;
 
-	if (arq->is_sync == REQ_SYNC && aic) {
+	if (rq_is_sync(arq->request) && aic) {
 		spin_lock(&aic->lock);
 		set_bit(AS_TASK_IORUNNING, &aic->state);
 		aic->last_end_request = jiffies;
@@ -254,14 +253,13 @@ static void as_put_io_context(struct as_rq *arq)
 /*
  * rb tree support functions
  */
-#define ARQ_RB_ROOT(ad, arq)	(&(ad)->sort_list[(arq)->is_sync])
+#define RQ_RB_ROOT(ad, rq)	(&(ad)->sort_list[rq_is_sync((rq))])
 
 static void as_add_arq_rb(struct as_data *ad, struct request *rq)
 {
-	struct as_rq *arq = RQ_DATA(rq);
 	struct request *alias;
 
-	while ((unlikely(alias = elv_rb_add(ARQ_RB_ROOT(ad, arq), rq)))) {
+	while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) {
 		as_move_to_dispatch(ad, RQ_DATA(alias));
 		as_antic_stop(ad);
 	}
@@ -269,7 +267,7 @@ static void as_add_arq_rb(struct as_data *ad, struct request *rq)
 
 static inline void as_del_arq_rb(struct as_data *ad, struct request *rq)
 {
-	elv_rb_del(ARQ_RB_ROOT(ad, RQ_DATA(rq)), rq);
+	elv_rb_del(RQ_RB_ROOT(ad, rq), rq);
 }
 
 /*
@@ -300,13 +298,13 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2)
 	if (arq2 == NULL)
 		return arq1;
 
-	data_dir = arq1->is_sync;
+	data_dir = rq_is_sync(arq1->request);
 
 	last = ad->last_sector[data_dir];
 	s1 = arq1->request->sector;
 	s2 = arq2->request->sector;
 
-	BUG_ON(data_dir != arq2->is_sync);
+	BUG_ON(data_dir != rq_is_sync(arq2->request));
 
 	/*
 	 * Strict one way elevator _except_ in the case where we allow
@@ -377,7 +375,7 @@ static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *arq)
 	if (rbnext)
 		next = RQ_DATA(rb_entry_rq(rbnext));
 	else {
-		const int data_dir = arq->is_sync;
+		const int data_dir = rq_is_sync(last);
 
 		rbnext = rb_first(&ad->sort_list[data_dir]);
 		if (rbnext && rbnext != &last->rb_node)
@@ -538,8 +536,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic,
 static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
 				struct request *rq)
 {
-	struct as_rq *arq = RQ_DATA(rq);
-	int data_dir = arq->is_sync;
+	int data_dir = rq_is_sync(rq);
 	unsigned long thinktime = 0;
 	sector_t seek_dist;
 
@@ -674,7 +671,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
 		return 1;
 	}
 
-	if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) {
+	if (arq && rq_is_sync(arq->request) && as_close_req(ad, aic, arq)) {
 		/*
 		 * Found a close request that is not one of ours.
 		 *
@@ -758,7 +755,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
  */
 static void as_update_arq(struct as_data *ad, struct as_rq *arq)
 {
-	const int data_dir = arq->is_sync;
+	const int data_dir = rq_is_sync(arq->request);
 
 	/* keep the next_arq cache up to date */
 	ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]);
@@ -835,7 +832,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 	 * actually serviced. This should help devices with big TCQ windows
 	 * and writeback caches
 	 */
-	if (ad->new_batch && ad->batch_data_dir == arq->is_sync) {
+	if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
 		update_write_batch(ad);
 		ad->current_batch_expires = jiffies +
 				ad->batch_expire[REQ_SYNC];
@@ -868,7 +865,7 @@ out:
 static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 {
 	struct as_rq *arq = RQ_DATA(rq);
-	const int data_dir = arq->is_sync;
+	const int data_dir = rq_is_sync(rq);
 	struct as_data *ad = q->elevator->elevator_data;
 
 	WARN_ON(arq->state != AS_RQ_QUEUED);
@@ -941,7 +938,7 @@ static inline int as_batch_expired(struct as_data *ad)
 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
-	const int data_dir = arq->is_sync;
+	const int data_dir = rq_is_sync(rq);
 
 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
 
@@ -1158,12 +1155,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 
 	arq->state = AS_RQ_NEW;
 
-	if (rq_data_dir(arq->request) == READ
-			|| (arq->request->cmd_flags & REQ_RW_SYNC))
-		arq->is_sync = 1;
-	else
-		arq->is_sync = 0;
-	data_dir = arq->is_sync;
+	data_dir = rq_is_sync(rq);
 
 	arq->io_context = as_get_io_context();
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a905c4934a55..55ef6efe3eb5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -531,6 +531,11 @@ enum {
 
 #define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 
+/*
+ * We regard a request as sync, if it's a READ or a SYNC write.
+ */
+#define rq_is_sync(rq)		(rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
+
 static inline int blk_queue_full(struct request_queue *q, int rw)
 {
 	if (rw == READ)
-- 
cgit v1.2.3


From ff7d145fd911266ae42af7552edc32681c01addb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 12 Jul 2006 14:04:37 +0200
Subject: [PATCH] Add one more pointer to struct request for IO scheduler usage

Then we have enough room in the request to get rid of the dynamic
allocations in CFQ/AS.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 include/linux/blkdev.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 55ef6efe3eb5..d2dc17151f6c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -232,7 +232,13 @@ struct request {
 	struct hlist_node hash;	/* merge hash */
 	struct rb_node rb_node;	/* sort/lookup */
 
+	/*
+	 * two pointers are available for the IO schedulers, if they need
+	 * more they have to dynamically allocate it.
+	 */
 	void *elevator_private;
+	void *elevator_private2;
+
 	void *completion_data;
 
 	int rq_status;	/* should split this into a few status bits */
-- 
cgit v1.2.3


From c00895ab2f08df7044e58ee01c38bf0a661ea0eb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 30 Sep 2006 20:29:12 +0200
Subject: [PATCH] Remove ->waiting member from struct request

As the comments indicates in blkdev.h, we can fold it into ->end_io_data
usage as that is really what ->waiting is. Fixup the users of
blk_end_sync_rq().

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/elevator.c          |  3 +--
 block/ll_rw_blk.c         | 13 ++++++-------
 drivers/block/DAC960.c    |  2 +-
 drivers/block/paride/pd.c |  3 +--
 drivers/block/pktcdvd.c   |  2 +-
 drivers/ide/ide-io.c      | 13 ++++++-------
 drivers/ide/ide-tape.c    |  2 +-
 drivers/ide/ide.c         |  4 ++--
 include/linux/blkdev.h    |  3 +--
 9 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index cbbc36ba016a..924b81b08f86 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -67,8 +67,7 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 	/*
 	 * same device and no special stuff set, merge is ok
 	 */
-	if (rq->rq_disk == bio->bi_bdev->bd_disk &&
-	    !rq->waiting && !rq->special)
+	if (rq->rq_disk == bio->bi_bdev->bd_disk && !rq->special)
 		return 1;
 
 	return 0;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index d388486e98bb..3b6aad2affd8 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -291,7 +291,6 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 	rq->buffer = NULL;
 	rq->ref_count = 1;
 	rq->q = q;
-	rq->waiting = NULL;
 	rq->special = NULL;
 	rq->data_len = 0;
 	rq->data = NULL;
@@ -451,6 +450,7 @@ static void queue_flush(request_queue_t *q, unsigned which)
 	rq->cmd_flags = REQ_HARDBARRIER;
 	rq_init(q, rq);
 	rq->elevator_private = NULL;
+	rq->elevator_private2 = NULL;
 	rq->rq_disk = q->bar_rq.rq_disk;
 	rq->rl = NULL;
 	rq->end_io = end_io;
@@ -479,6 +479,7 @@ static inline struct request *start_ordered(request_queue_t *q,
 		rq->cmd_flags |= REQ_RW;
 	rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
 	rq->elevator_private = NULL;
+	rq->elevator_private2 = NULL;
 	rq->rl = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
 	rq->end_io = bar_end_io;
@@ -2569,10 +2570,9 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
 		rq->sense_len = 0;
 	}
 
-	rq->waiting = &wait;
+	rq->end_io_data = &wait;
 	blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
 	wait_for_completion(&wait);
-	rq->waiting = NULL;
 
 	if (rq->errors)
 		err = -EIO;
@@ -2736,9 +2736,9 @@ EXPORT_SYMBOL(blk_put_request);
  */
 void blk_end_sync_rq(struct request *rq, int error)
 {
-	struct completion *waiting = rq->waiting;
+	struct completion *waiting = rq->end_io_data;
 
-	rq->waiting = NULL;
+	rq->end_io_data = NULL;
 	__blk_put_request(rq->q, rq);
 
 	/*
@@ -2801,7 +2801,7 @@ static int attempt_merge(request_queue_t *q, struct request *req,
 
 	if (rq_data_dir(req) != rq_data_dir(next)
 	    || req->rq_disk != next->rq_disk
-	    || next->waiting || next->special)
+	    || next->special)
 		return 0;
 
 	/*
@@ -2886,7 +2886,6 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
 	req->nr_phys_segments = bio_phys_segments(req->q, bio);
 	req->nr_hw_segments = bio_hw_segments(req->q, bio);
 	req->buffer = bio_data(bio);	/* see ->buffer comment above */
-	req->waiting = NULL;
 	req->bio = req->biotail = bio;
 	req->ioprio = bio_prio(bio);
 	req->rq_disk = bio->bi_bdev->bd_disk;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index a360215dbce7..2568640430fb 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3331,7 +3331,7 @@ static int DAC960_process_queue(DAC960_Controller_T *Controller, struct request_
 		Command->DmaDirection = PCI_DMA_TODEVICE;
 		Command->CommandType = DAC960_WriteCommand;
 	}
-	Command->Completion = Request->waiting;
+	Command->Completion = Request->end_io_data;
 	Command->LogicalDriveNumber = (long)Request->rq_disk->private_data;
 	Command->BlockNumber = Request->sector;
 	Command->BlockCount = Request->nr_sectors;
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 12ff1a274d91..500d2ebb41e4 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -722,11 +722,10 @@ static int pd_special_command(struct pd_unit *disk,
 	rq.rq_status = RQ_ACTIVE;
 	rq.rq_disk = disk->gd;
 	rq.ref_count = 1;
-	rq.waiting = &wait;
+	rq.end_io_data = &wait;
 	rq.end_io = blk_end_sync_rq;
 	blk_insert_request(disk->gd->queue, &rq, 0, func);
 	wait_for_completion(&wait);
-	rq.waiting = NULL;
 	if (rq.errors)
 		err = -EIO;
 	blk_put_request(&rq);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 42891d2b054e..888d1aceeeff 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -375,7 +375,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
 
 	rq->ref_count++;
-	rq->waiting = &wait;
+	rq->end_io_data = &wait;
 	rq->end_io = blk_end_sync_rq;
 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
 	generic_unplug_device(q);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 3436b1f104eb..a3ffb04436bd 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -141,7 +141,7 @@ enum {
 
 static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 stat, u8 error)
 {
-	struct request_pm_state *pm = rq->end_io_data;
+	struct request_pm_state *pm = rq->data;
 
 	if (drive->media != ide_disk)
 		return;
@@ -164,7 +164,7 @@ static void ide_complete_power_step(ide_drive_t *drive, struct request *rq, u8 s
 
 static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->end_io_data;
+	struct request_pm_state *pm = rq->data;
 	ide_task_t *args = rq->special;
 
 	memset(args, 0, sizeof(*args));
@@ -421,7 +421,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 			}
 		}
 	} else if (blk_pm_request(rq)) {
-		struct request_pm_state *pm = rq->end_io_data;
+		struct request_pm_state *pm = rq->data;
 #ifdef DEBUG_PM
 		printk("%s: complete_power_step(step: %d, stat: %x, err: %x)\n",
 			drive->name, rq->pm->pm_step, stat, err);
@@ -933,7 +933,7 @@ done:
 
 static void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
-	struct request_pm_state *pm = rq->end_io_data;
+	struct request_pm_state *pm = rq->data;
 
 	if (blk_pm_suspend_request(rq) &&
 	    pm->pm_step == ide_pm_state_start_suspend)
@@ -1018,7 +1018,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 		    rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
-			struct request_pm_state *pm = rq->end_io_data;
+			struct request_pm_state *pm = rq->data;
 #ifdef DEBUG_PM
 			printk("%s: start_power_step(step: %d)\n",
 				drive->name, rq->pm->pm_step);
@@ -1718,7 +1718,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	 */
 	if (must_wait) {
 		rq->ref_count++;
-		rq->waiting = &wait;
+		rq->end_io_data = &wait;
 		rq->end_io = blk_end_sync_rq;
 	}
 
@@ -1736,7 +1736,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	err = 0;
 	if (must_wait) {
 		wait_for_completion(&wait);
-		rq->waiting = NULL;
 		if (rq->errors)
 			err = -EIO;
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 643e4b9ac651..66f9678d2f10 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2773,7 +2773,7 @@ static void idetape_wait_for_request (ide_drive_t *drive, struct request *rq)
 		return;
 	}
 #endif /* IDETAPE_DEBUG_BUGS */
-	rq->waiting = &wait;
+	rq->end_io_data = &wait;
 	rq->end_io = blk_end_sync_rq;
 	spin_unlock_irq(&tape->spinlock);
 	wait_for_completion(&wait);
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 9384a3fdde6c..2b1a1389c318 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -1219,7 +1219,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	memset(&args, 0, sizeof(args));
 	rq.cmd_type = REQ_TYPE_PM_SUSPEND;
 	rq.special = &args;
-	rq.end_io_data = &rqpm;
+	rq.data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_suspend;
 	if (mesg.event == PM_EVENT_PRETHAW)
 		mesg.event = PM_EVENT_FREEZE;
@@ -1240,7 +1240,7 @@ static int generic_ide_resume(struct device *dev)
 	memset(&args, 0, sizeof(args));
 	rq.cmd_type = REQ_TYPE_PM_RESUME;
 	rq.special = &args;
-	rq.end_io_data = &rqpm;
+	rq.data = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_resume;
 	rqpm.pm_state = PM_EVENT_ON;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d2dc17151f6c..604f23189097 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -266,7 +266,6 @@ struct request {
 	request_queue_t *q;
 	struct request_list *rl;
 
-	struct completion *waiting;
 	void *special;
 	char *buffer;
 
@@ -285,7 +284,7 @@ struct request {
 	int retries;
 
 	/*
-	 * completion callback. end_io_data should be folded in with waiting
+	 * completion callback.
 	 */
 	rq_end_io_fn *end_io;
 	void *end_io_data;
-- 
cgit v1.2.3


From 49171e5c6f414d49a061b5c1c84967c2eb569822 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 10 Aug 2006 08:59:11 +0200
Subject: [PATCH] Remove struct request_list from struct request

It is always identical to &q->rq, and we only use it for detecting
whether this request came out of our mempool or not. So replace it
with an additional ->flags bit flag.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/ll_rw_blk.c      | 10 ++--------
 include/linux/blkdev.h |  3 ++-
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 3b6aad2affd8..f7462502bfdf 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -452,7 +452,6 @@ static void queue_flush(request_queue_t *q, unsigned which)
 	rq->elevator_private = NULL;
 	rq->elevator_private2 = NULL;
 	rq->rq_disk = q->bar_rq.rq_disk;
-	rq->rl = NULL;
 	rq->end_io = end_io;
 	q->prepare_flush_fn(q, rq);
 
@@ -480,7 +479,6 @@ static inline struct request *start_ordered(request_queue_t *q,
 	rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
 	rq->elevator_private = NULL;
 	rq->elevator_private2 = NULL;
-	rq->rl = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
 	rq->end_io = bar_end_io;
 
@@ -2018,7 +2016,7 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
 	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
 	 * see bio.h and blkdev.h
 	 */
-	rq->cmd_flags = rw;
+	rq->cmd_flags = rw | REQ_ALLOCED;
 
 	if (priv) {
 		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
@@ -2196,7 +2194,6 @@ rq_starved:
 		ioc->nr_batch_requests--;
 	
 	rq_init(q, rq);
-	rq->rl = rl;
 
 	blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
 out:
@@ -2681,8 +2678,6 @@ EXPORT_SYMBOL_GPL(disk_round_stats);
  */
 void __blk_put_request(request_queue_t *q, struct request *req)
 {
-	struct request_list *rl = req->rl;
-
 	if (unlikely(!q))
 		return;
 	if (unlikely(--req->ref_count))
@@ -2691,13 +2686,12 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 	elv_completed_request(q, req);
 
 	req->rq_status = RQ_INACTIVE;
-	req->rl = NULL;
 
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
 	 */
-	if (rl) {
+	if (req->cmd_flags & REQ_ALLOCED) {
 		int rw = rq_data_dir(req);
 		int priv = req->cmd_flags & REQ_ELVPRIV;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 604f23189097..d4c1dd046e27 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -180,6 +180,7 @@ enum rq_flag_bits {
 	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
 	__REQ_ORDERED_COLOR,	/* is before or after barrier */
 	__REQ_RW_SYNC,		/* request is sync (O_DIRECT) */
+	__REQ_ALLOCED,		/* request came from our alloc pool */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -199,6 +200,7 @@ enum rq_flag_bits {
 #define REQ_PREEMPT	(1 << __REQ_PREEMPT)
 #define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 #define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
+#define REQ_ALLOCED	(1 << __REQ_ALLOCED)
 
 #define BLK_MAX_CDB	16
 
@@ -264,7 +266,6 @@ struct request {
 
 	int ref_count;
 	request_queue_t *q;
-	struct request_list *rl;
 
 	void *special;
 	char *buffer;
-- 
cgit v1.2.3


From cdd6026217c0e4cda2efce1bdc318661bef1f66f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 28 Jul 2006 09:32:07 +0200
Subject: [PATCH] Remove ->rq_status from struct request

After Christophs SCSI change, the only usage left is RQ_ACTIVE
and RQ_INACTIVE. The block layer sets RQ_INACTIVE right before freeing
the request, so any check for RQ_INACTIVE in a driver is a bug and
indicates use-after-free.

So kill/clean the remaining users, straight forward.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 arch/um/drivers/ubd_kern.c |  2 --
 block/ll_rw_blk.c          |  3 ---
 drivers/block/paride/pd.c  |  1 -
 drivers/block/swim3.c      |  4 ++--
 drivers/block/swim_iop.c   |  4 ++--
 drivers/fc4/fc.c           |  1 -
 drivers/ide/ide-floppy.c   |  3 +--
 drivers/ide/ide-io.c       |  1 -
 drivers/ide/ide-tape.c     |  4 ++--
 drivers/scsi/ide-scsi.c    |  2 +-
 drivers/scsi/scsi.c        |  2 +-
 include/linux/blkdev.h     | 13 +++++--------
 12 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 5fa4c8e258a4..fda4a3940698 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -981,8 +981,6 @@ static int prepare_request(struct request *req, struct io_thread_req *io_req)
 	__u64 offset;
 	int len;
 
-	if(req->rq_status == RQ_INACTIVE) return(1);
-
 	/* This should be impossible now */
 	if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
 		printk("Write attempted on readonly ubd device %s\n",
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index f7462502bfdf..b94a396aa624 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -283,7 +283,6 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 	INIT_LIST_HEAD(&rq->donelist);
 
 	rq->errors = 0;
-	rq->rq_status = RQ_ACTIVE;
 	rq->bio = rq->biotail = NULL;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
@@ -2685,8 +2684,6 @@ void __blk_put_request(request_queue_t *q, struct request *req)
 
 	elv_completed_request(q, req);
 
-	req->rq_status = RQ_INACTIVE;
-
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 500d2ebb41e4..38578b9dbfd1 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -719,7 +719,6 @@ static int pd_special_command(struct pd_unit *disk,
 
 	memset(&rq, 0, sizeof(rq));
 	rq.errors = 0;
-	rq.rq_status = RQ_ACTIVE;
 	rq.rq_disk = disk->gd;
 	rq.ref_count = 1;
 	rq.end_io_data = &wait;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index cc42e762396f..f2305ee792a1 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -319,8 +319,8 @@ static void start_request(struct floppy_state *fs)
 		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
 		       req->rq_disk->disk_name, req->cmd,
 		       (long)req->sector, req->nr_sectors, req->buffer);
-		printk("           rq_status=%d errors=%d current_nr_sectors=%ld\n",
-		       req->rq_status, req->errors, req->current_nr_sectors);
+		printk("           errors=%d current_nr_sectors=%ld\n",
+		       req->errors, req->current_nr_sectors);
 #endif
 
 		if (req->sector < 0 || req->sector >= fs->total_secs) {
diff --git a/drivers/block/swim_iop.c b/drivers/block/swim_iop.c
index 89e3c2f8b776..dfda796eba56 100644
--- a/drivers/block/swim_iop.c
+++ b/drivers/block/swim_iop.c
@@ -529,8 +529,8 @@ static void start_request(struct floppy_state *fs)
 		printk("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%ld buf=%p\n",
 		       CURRENT->rq_disk->disk_name, CURRENT->cmd,
 		       CURRENT->sector, CURRENT->nr_sectors, CURRENT->buffer);
-		printk("           rq_status=%d errors=%d current_nr_sectors=%ld\n",
-		       CURRENT->rq_status, CURRENT->errors, CURRENT->current_nr_sectors);
+		printk("           errors=%d current_nr_sectors=%ld\n",
+		      CURRENT->errors, CURRENT->current_nr_sectors);
 #endif
 
 		if (CURRENT->sector < 0 || CURRENT->sector >= fs->total_secs) {
diff --git a/drivers/fc4/fc.c b/drivers/fc4/fc.c
index 1a159e8843ca..22d17474755f 100644
--- a/drivers/fc4/fc.c
+++ b/drivers/fc4/fc.c
@@ -974,7 +974,6 @@ int fcp_scsi_dev_reset(Scsi_Cmnd *SCpnt)
 	 */
 
 	fc->rst_pkt->device->host->eh_action = &sem;
-	fc->rst_pkt->request->rq_status = RQ_SCSI_BUSY;
 
 	fc->rst_pkt->done = fcp_scsi_reset_done;
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 0edc32204915..8ccee9c769f8 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -1281,8 +1281,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
 	idefloppy_pc_t *pc;
 	unsigned long block = (unsigned long)block_s;
 
-	debug_log(KERN_INFO "rq_status: %d, dev: %s, flags: %lx, errors: %d\n",
-			rq->rq_status,
+	debug_log(KERN_INFO "dev: %s, flags: %lx, errors: %d\n",
 			rq->rq_disk ? rq->rq_disk->disk_name : "?",
 			rq->flags, rq->errors);
 	debug_log(KERN_INFO "sector: %ld, nr_sectors: %ld, "
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index a3ffb04436bd..38479a29d3e1 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1710,7 +1710,6 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 	int must_wait = (action == ide_wait || action == ide_head_wait);
 
 	rq->errors = 0;
-	rq->rq_status = RQ_ACTIVE;
 
 	/*
 	 * we need to hold an extra reference to request for safe inspection
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 66f9678d2f10..2ebc3760f261 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -2423,8 +2423,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
 #if IDETAPE_DEBUG_LOG
 #if 0
 	if (tape->debug_level >= 5)
-		printk(KERN_INFO "ide-tape: rq_status: %d, "
-			"dev: %s, cmd: %ld, errors: %d\n", rq->rq_status,
+		printk(KERN_INFO "ide-tape:  %d, "
+			"dev: %s, cmd: %ld, errors: %d\n",
 			 rq->rq_disk->disk_name, rq->cmd[0], rq->errors);
 #endif
 	if (tape->debug_level >= 2)
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 65b19695ebe2..1427a41e8441 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -708,7 +708,7 @@ static ide_startstop_t idescsi_issue_pc (ide_drive_t *drive, idescsi_pc_t *pc)
 static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block)
 {
 #if IDESCSI_DEBUG_LOG
-	printk (KERN_INFO "rq_status: %d, dev: %s, cmd: %x, errors: %d\n",rq->rq_status, rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
+	printk (KERN_INFO "dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name,rq->cmd[0],rq->errors);
 	printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
 #endif /* IDESCSI_DEBUG_LOG */
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 7a054f9d1ee3..12f6639dda2d 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1065,7 +1065,7 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery)
 
 	spin_lock_irqsave(&sdev->list_lock, flags);
 	list_for_each_entry(scmd, &sdev->cmd_list, list) {
-		if (scmd->request && scmd->request->rq_status != RQ_INACTIVE) {
+		if (scmd->request) {
 			/*
 			 * If we are unable to remove the timer, it means
 			 * that the command has already timed out or
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d4c1dd046e27..8a3e309e0842 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -243,8 +243,6 @@ struct request {
 
 	void *completion_data;
 
-	int rq_status;	/* should split this into a few status bits */
-	int errors;
 	struct gendisk *rq_disk;
 	unsigned long start_time;
 
@@ -262,14 +260,16 @@ struct request {
 
 	unsigned short ioprio;
 
-	int tag;
-
-	int ref_count;
 	request_queue_t *q;
 
 	void *special;
 	char *buffer;
 
+	int tag;
+	int errors;
+
+	int ref_count;
+
 	/*
 	 * when request is used as a packet command carrier
 	 */
@@ -456,9 +456,6 @@ struct request_queue
 	struct mutex		sysfs_lock;
 };
 
-#define RQ_INACTIVE		(-1)
-#define RQ_ACTIVE		1
-
 #define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
 #define QUEUE_FLAG_STOPPED	2	/* queue is stopped */
-- 
cgit v1.2.3


From cb78b285c8f9d59b0d4e4f6a54c2977ce1d9b880 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 28 Jul 2006 09:32:57 +0200
Subject: [PATCH] Drop useless bio passing in may_queue/set_request API

It's not needed for anything, so kill the bio passing.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c       | 2 +-
 block/cfq-iosched.c      | 5 ++---
 block/elevator.c         | 9 ++++-----
 block/ll_rw_blk.c        | 9 ++++-----
 include/linux/elevator.h | 8 ++++----
 5 files changed, 15 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 02eb9333898f..66015bc79e6f 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1284,7 +1284,7 @@ static void as_work_handler(void *data)
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
+static int as_may_queue(request_queue_t *q, int rw)
 {
 	int ret = ELV_MQUEUE_MAY;
 	struct as_data *ad = q->elevator->elevator_data;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 3c5fd9c2c205..2ac35aacbbf9 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1752,7 +1752,7 @@ __cfq_may_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	return ELV_MQUEUE_MAY;
 }
 
-static int cfq_may_queue(request_queue_t *q, int rw, struct bio *bio)
+static int cfq_may_queue(request_queue_t *q, int rw)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
@@ -1817,8 +1817,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
  * Allocate cfq data structures associated with this request.
  */
 static int
-cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		gfp_t gfp_mask)
+cfq_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
diff --git a/block/elevator.c b/block/elevator.c
index 924b81b08f86..788d2d81994c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -796,13 +796,12 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
 	return NULL;
 }
 
-int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		    gfp_t gfp_mask)
+int elv_set_request(request_queue_t *q, struct request *rq, gfp_t gfp_mask)
 {
 	elevator_t *e = q->elevator;
 
 	if (e->ops->elevator_set_req_fn)
-		return e->ops->elevator_set_req_fn(q, rq, bio, gfp_mask);
+		return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
 
 	rq->elevator_private = NULL;
 	return 0;
@@ -816,12 +815,12 @@ void elv_put_request(request_queue_t *q, struct request *rq)
 		e->ops->elevator_put_req_fn(q, rq);
 }
 
-int elv_may_queue(request_queue_t *q, int rw, struct bio *bio)
+int elv_may_queue(request_queue_t *q, int rw)
 {
 	elevator_t *e = q->elevator;
 
 	if (e->ops->elevator_may_queue_fn)
-		return e->ops->elevator_may_queue_fn(q, rw, bio);
+		return e->ops->elevator_may_queue_fn(q, rw);
 
 	return ELV_MQUEUE_MAY;
 }
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index b94a396aa624..b1ea941f6dc3 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2003,8 +2003,7 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq)
 }
 
 static inline struct request *
-blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
-		  int priv, gfp_t gfp_mask)
+blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -2018,7 +2017,7 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
 	rq->cmd_flags = rw | REQ_ALLOCED;
 
 	if (priv) {
-		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
+		if (unlikely(elv_set_request(q, rq, gfp_mask))) {
 			mempool_free(rq, q->rq.rq_pool);
 			return NULL;
 		}
@@ -2109,7 +2108,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 	struct io_context *ioc = NULL;
 	int may_queue, priv;
 
-	may_queue = elv_may_queue(q, rw, bio);
+	may_queue = elv_may_queue(q, rw);
 	if (may_queue == ELV_MQUEUE_NO)
 		goto rq_starved;
 
@@ -2157,7 +2156,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
+	rq = blk_alloc_request(q, rw, priv, gfp_mask);
 	if (unlikely(!rq)) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 0e7b1a733919..cc81645a3e18 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -14,9 +14,9 @@ typedef void (elevator_add_req_fn) (request_queue_t *, struct request *);
 typedef int (elevator_queue_empty_fn) (request_queue_t *);
 typedef struct request *(elevator_request_list_fn) (request_queue_t *, struct request *);
 typedef void (elevator_completed_req_fn) (request_queue_t *, struct request *);
-typedef int (elevator_may_queue_fn) (request_queue_t *, int, struct bio *);
+typedef int (elevator_may_queue_fn) (request_queue_t *, int);
 
-typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, struct bio *, gfp_t);
+typedef int (elevator_set_req_fn) (request_queue_t *, struct request *, gfp_t);
 typedef void (elevator_put_req_fn) (request_queue_t *, struct request *);
 typedef void (elevator_activate_req_fn) (request_queue_t *, struct request *);
 typedef void (elevator_deactivate_req_fn) (request_queue_t *, struct request *);
@@ -105,9 +105,9 @@ extern struct request *elv_former_request(request_queue_t *, struct request *);
 extern struct request *elv_latter_request(request_queue_t *, struct request *);
 extern int elv_register_queue(request_queue_t *q);
 extern void elv_unregister_queue(request_queue_t *q);
-extern int elv_may_queue(request_queue_t *, int, struct bio *);
+extern int elv_may_queue(request_queue_t *, int);
 extern void elv_completed_request(request_queue_t *, struct request *);
-extern int elv_set_request(request_queue_t *, struct request *, struct bio *, gfp_t);
+extern int elv_set_request(request_queue_t *, struct request *, gfp_t);
 extern void elv_put_request(request_queue_t *, struct request *);
 
 /*
-- 
cgit v1.2.3


From e6a1c874a064e7d07f24986aba7cd537b7f4a25d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 10 Aug 2006 09:00:21 +0200
Subject: [PATCH] struct request: shrink and optimize some more

Move some members around and unionize completion_data and rb_node since
they cannot ever be used at the same time.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 include/linux/blkdev.h | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8a3e309e0842..a1e288069e2e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -211,6 +211,8 @@ struct request {
 	struct list_head queuelist;
 	struct list_head donelist;
 
+	request_queue_t *q;
+
 	unsigned int cmd_flags;
 	enum rq_cmd_type_bits cmd_type;
 
@@ -219,12 +221,12 @@ struct request {
 	 */
 
 	sector_t sector;		/* next sector to submit */
+	sector_t hard_sector;		/* next sector to complete */
 	unsigned long nr_sectors;	/* no. of sectors left to submit */
+	unsigned long hard_nr_sectors;	/* no. of sectors left to complete */
 	/* no. of sectors left to submit in the current segment */
 	unsigned int current_nr_sectors;
 
-	sector_t hard_sector;		/* next sector to complete */
-	unsigned long hard_nr_sectors;	/* no. of sectors left to complete */
 	/* no. of sectors left to complete in the current segment */
 	unsigned int hard_cur_sectors;
 
@@ -232,7 +234,15 @@ struct request {
 	struct bio *biotail;
 
 	struct hlist_node hash;	/* merge hash */
-	struct rb_node rb_node;	/* sort/lookup */
+	/*
+	 * The rb_node is only used inside the io scheduler, requests
+	 * are pruned when moved to the dispatch queue. So let the
+	 * completion_data share space with the rb_node.
+	 */
+	union {
+		struct rb_node rb_node;	/* sort/lookup */
+		void *completion_data;
+	};
 
 	/*
 	 * two pointers are available for the IO schedulers, if they need
@@ -241,8 +251,6 @@ struct request {
 	void *elevator_private;
 	void *elevator_private2;
 
-	void *completion_data;
-
 	struct gendisk *rq_disk;
 	unsigned long start_time;
 
@@ -260,8 +268,6 @@ struct request {
 
 	unsigned short ioprio;
 
-	request_queue_t *q;
-
 	void *special;
 	char *buffer;
 
-- 
cgit v1.2.3


From fc46379daf90dce57bf765c81d3b39f55150aac2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 29 Aug 2006 09:05:44 +0200
Subject: [PATCH] cfq-iosched: kill cfq_exit_lock

cfq_exit_lock is protecting two things now:

- The per-ioc rbtree of cfq_io_contexts

- The per-cfqd linked list of cfq_io_contexts

The per-cfqd linked list can be protected by the queue lock, as it is (by
definition) per cfqd as the queue lock is.

The per-ioc rbtree is mainly used and updated by the process itself only.
The only outside use is the io priority changing. If we move the
priority changing to not browsing the rbtree, we can remove any locking
from the rbtree updates and lookup completely. Let the sys_ioprio syscall
just mark processes as having the iopriority changed and lazily update
the private cfq io contexts the next time io is queued, and we can
remove this locking as well.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/cfq-iosched.c    | 54 ++++++++++++++++----------------------------------
 block/ll_rw_blk.c      |  2 +-
 fs/ioprio.c            |  4 ++--
 include/linux/blkdev.h |  2 +-
 4 files changed, 21 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ec24284e9d39..33e0b0c5e31d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -31,8 +31,6 @@ static int cfq_slice_idle = HZ / 125;
 
 #define CFQ_KEY_ASYNC		(0)
 
-static DEFINE_SPINLOCK(cfq_exit_lock);
-
 /*
  * for the hash of cfqq inside the cfqd
  */
@@ -1084,12 +1082,6 @@ static void cfq_free_io_context(struct io_context *ioc)
 		complete(ioc_gone);
 }
 
-static void cfq_trim(struct io_context *ioc)
-{
-	ioc->set_ioprio = NULL;
-	cfq_free_io_context(ioc);
-}
-
 static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	if (unlikely(cfqq == cfqd->active_queue))
@@ -1101,6 +1093,10 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 					 struct cfq_io_context *cic)
 {
+	list_del_init(&cic->queue_list);
+	smp_wmb();
+	cic->key = NULL;
+
 	if (cic->cfqq[ASYNC]) {
 		cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
 		cic->cfqq[ASYNC] = NULL;
@@ -1110,9 +1106,6 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 		cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
 		cic->cfqq[SYNC] = NULL;
 	}
-
-	cic->key = NULL;
-	list_del_init(&cic->queue_list);
 }
 
 
@@ -1123,27 +1116,23 @@ static void cfq_exit_single_io_context(struct cfq_io_context *cic)
 {
 	struct cfq_data *cfqd = cic->key;
 
-	WARN_ON(!irqs_disabled());
-
 	if (cfqd) {
 		request_queue_t *q = cfqd->queue;
 
-		spin_lock(q->queue_lock);
+		spin_lock_irq(q->queue_lock);
 		__cfq_exit_single_io_context(cfqd, cic);
-		spin_unlock(q->queue_lock);
+		spin_unlock_irq(q->queue_lock);
 	}
 }
 
 static void cfq_exit_io_context(struct io_context *ioc)
 {
 	struct cfq_io_context *__cic;
-	unsigned long flags;
 	struct rb_node *n;
 
 	/*
 	 * put the reference this task is holding to the various queues
 	 */
-	spin_lock_irqsave(&cfq_exit_lock, flags);
 
 	n = rb_first(&ioc->cic_root);
 	while (n != NULL) {
@@ -1152,8 +1141,6 @@ static void cfq_exit_io_context(struct io_context *ioc)
 		cfq_exit_single_io_context(__cic);
 		n = rb_next(n);
 	}
-
-	spin_unlock_irqrestore(&cfq_exit_lock, flags);
 }
 
 static struct cfq_io_context *
@@ -1248,15 +1235,12 @@ static inline void changed_ioprio(struct cfq_io_context *cic)
 	spin_unlock(cfqd->queue->queue_lock);
 }
 
-/*
- * callback from sys_ioprio_set, irqs are disabled
- */
-static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
+static void cfq_ioc_set_ioprio(struct io_context *ioc)
 {
 	struct cfq_io_context *cic;
 	struct rb_node *n;
 
-	spin_lock(&cfq_exit_lock);
+	ioc->ioprio_changed = 0;
 
 	n = rb_first(&ioc->cic_root);
 	while (n != NULL) {
@@ -1265,10 +1249,6 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 		changed_ioprio(cic);
 		n = rb_next(n);
 	}
-
-	spin_unlock(&cfq_exit_lock);
-
-	return 0;
 }
 
 static struct cfq_queue *
@@ -1336,10 +1316,8 @@ out:
 static void
 cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)
 {
-	spin_lock(&cfq_exit_lock);
+	WARN_ON(!list_empty(&cic->queue_list));
 	rb_erase(&cic->rb_node, &ioc->cic_root);
-	list_del_init(&cic->queue_list);
-	spin_unlock(&cfq_exit_lock);
 	kmem_cache_free(cfq_ioc_pool, cic);
 	atomic_dec(&ioc_count);
 }
@@ -1385,7 +1363,6 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
 	cic->ioc = ioc;
 	cic->key = cfqd;
 
-	ioc->set_ioprio = cfq_ioc_set_ioprio;
 restart:
 	parent = NULL;
 	p = &ioc->cic_root.rb_node;
@@ -1407,11 +1384,12 @@ restart:
 			BUG();
 	}
 
-	spin_lock(&cfq_exit_lock);
 	rb_link_node(&cic->rb_node, parent, p);
 	rb_insert_color(&cic->rb_node, &ioc->cic_root);
+
+	spin_lock_irq(cfqd->queue->queue_lock);
 	list_add(&cic->queue_list, &cfqd->cic_list);
-	spin_unlock(&cfq_exit_lock);
+	spin_unlock_irq(cfqd->queue->queue_lock);
 }
 
 /*
@@ -1441,6 +1419,10 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 	cfq_cic_link(cfqd, ioc, cic);
 out:
+	smp_read_barrier_depends();
+	if (unlikely(ioc->ioprio_changed))
+		cfq_ioc_set_ioprio(ioc);
+
 	return cic;
 err:
 	put_io_context(ioc);
@@ -1945,7 +1927,6 @@ static void cfq_exit_queue(elevator_t *e)
 
 	cfq_shutdown_timer_wq(cfqd);
 
-	spin_lock(&cfq_exit_lock);
 	spin_lock_irq(q->queue_lock);
 
 	if (cfqd->active_queue)
@@ -1960,7 +1941,6 @@ static void cfq_exit_queue(elevator_t *e)
 	}
 
 	spin_unlock_irq(q->queue_lock);
-	spin_unlock(&cfq_exit_lock);
 
 	cfq_shutdown_timer_wq(cfqd);
 
@@ -2149,7 +2129,7 @@ static struct elevator_type iosched_cfq = {
 		.elevator_may_queue_fn =	cfq_may_queue,
 		.elevator_init_fn =		cfq_init_queue,
 		.elevator_exit_fn =		cfq_exit_queue,
-		.trim =				cfq_trim,
+		.trim =				cfq_free_io_context,
 	},
 	.elevator_attrs =	cfq_attrs,
 	.elevator_name =	"cfq",
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index e25b4cd2dcd1..508548b834f1 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3654,7 +3654,7 @@ struct io_context *current_io_context(gfp_t gfp_flags)
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
 		ret->task = current;
-		ret->set_ioprio = NULL;
+		ret->ioprio_changed = 0;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 78b1deae3fa2..0fd1089d7bf6 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -47,8 +47,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
 	/* see wmb() in current_io_context() */
 	smp_read_barrier_depends();
 
-	if (ioc && ioc->set_ioprio)
-		ioc->set_ioprio(ioc, ioprio);
+	if (ioc)
+		ioc->ioprio_changed = 1;
 
 	task_unlock(task);
 	return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a1e288069e2e..79cb9fa8034a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -90,7 +90,7 @@ struct io_context {
 	atomic_t refcount;
 	struct task_struct *task;
 
-	int (*set_ioprio)(struct io_context *, unsigned int);
+	unsigned int ioprio_changed;
 
 	/*
 	 * For request batching
-- 
cgit v1.2.3


From 4a893e837bb470867d74c05d6c6b97bba5a96185 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Sat, 22 Jul 2006 15:37:43 +0200
Subject: [PATCH] elevator: define ioc counting mechanism

None of the in-kernel primitives for handling "atomic" counting seem
to be a good fit. We need something that is essentially free for
incrementing/decrementing, while the read side may be more expensive
as we only ever need to do that when a device is removed from the
kernel.

Use a per-cpu variable for maintaining a per-cpu ioc count and define
a reading mechanism that just sums up the values.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 include/linux/elevator.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index cc81645a3e18..9c5a04f6114c 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_ELEVATOR_H
 #define _LINUX_ELEVATOR_H
 
+#include <linux/percpu.h>
+
 typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
 				 struct bio *);
 
@@ -178,4 +180,27 @@ enum {
 	INIT_LIST_HEAD(&(rq)->donelist);	\
 	} while (0)
 
+/*
+ * io context count accounting
+ */
+#define elv_ioc_count_mod(name, __val)				\
+	do {							\
+		preempt_disable();				\
+		__get_cpu_var(name) += (__val);			\
+		preempt_enable();				\
+	} while (0)
+
+#define elv_ioc_count_inc(name)	elv_ioc_count_mod(name, 1)
+#define elv_ioc_count_dec(name)	elv_ioc_count_mod(name, -1)
+
+#define elv_ioc_count_read(name)				\
+({								\
+	unsigned long __val = 0;				\
+	int __cpu;						\
+	smp_wmb();						\
+	for_each_possible_cpu(__cpu)				\
+		__val += per_cpu(name, __cpu);			\
+	__val;							\
+})
+
 #endif
-- 
cgit v1.2.3


From a3b05e8f58c95dfccbf2c824d0c68e5990571f24 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 28 Jul 2006 09:36:46 +0200
Subject: [PATCH] Kill various deprecated/unused block layer defines/functions

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 drivers/block/cciss.c    |  1 -
 drivers/block/cpqarray.c |  1 -
 include/linux/blkdev.h   | 29 -----------------------------
 include/linux/fs.h       |  1 -
 4 files changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 2cd3391ff878..c211065ad829 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1229,7 +1229,6 @@ static inline void complete_buffers(struct bio *bio, int status)
 		int nr_sectors = bio_sectors(bio);
 
 		bio->bi_next = NULL;
-		blk_finished_io(len);
 		bio_endio(bio, nr_sectors << 9, status ? 0 : -EIO);
 		bio = xbh;
 	}
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 78082edc14b4..4abc193314ee 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -989,7 +989,6 @@ static inline void complete_buffers(struct bio *bio, int ok)
 		xbh = bio->bi_next;
 		bio->bi_next = NULL;
 		
-		blk_finished_io(nr_sectors);
 		bio_endio(bio, nr_sectors << 9, ok ? 0 : -EIO);
 
 		bio = xbh;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 79cb9fa8034a..593386162f47 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -578,12 +578,6 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
 
-/*
- * noop, requests are automagically marked as active/inactive by I/O
- * scheduler -- see elv_next_request
- */
-#define blk_queue_headactive(q, head_active)
-
 /*
  * q->prep_rq_fn return values
  */
@@ -621,11 +615,6 @@ static inline void blk_queue_bounce(request_queue_t *q, struct bio **bio)
 	if ((rq->bio))			\
 		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
 
-struct sec_size {
-	unsigned block_size;
-	unsigned block_size_bits;
-};
-
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
@@ -690,16 +679,6 @@ extern void end_that_request_last(struct request *, int);
 extern void end_request(struct request *req, int uptodate);
 extern void blk_complete_request(struct request *);
 
-static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
-{
-	if (blk_fs_request(rq))
-		return (nr_bytes >= (rq->hard_nr_sectors << 9));
-	else if (blk_pc_request(rq))
-		return nr_bytes >= rq->data_len;
-
-	return 0;
-}
-
 /*
  * end_that_request_first/chunk() takes an uptodate argument. we account
  * any value <= as an io error. 0 means -EIO for compatability reasons,
@@ -807,14 +786,6 @@ static inline int queue_dma_alignment(request_queue_t *q)
 	return retval;
 }
 
-static inline int bdev_dma_aligment(struct block_device *bdev)
-{
-	return queue_dma_alignment(bdev_get_queue(bdev));
-}
-
-#define blk_finished_io(nsects)	do { } while (0)
-#define blk_started_io(nsects)	do { } while (0)
-
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6eafbe309483..9306f63bf77e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -79,7 +79,6 @@ extern int dir_notify_enable;
 #define WRITE 1
 #define READA 2		/* read-ahead  - don't block if no resources */
 #define SWRITE 3	/* for ll_rw_block() - wait for buffer lock */
-#define SPECIAL 4	/* For non-blockdevice requests in request queue */
 #define READ_SYNC	(READ | (1 << BIO_RW_SYNC))
 #define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
 #define WRITE_BARRIER	((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
-- 
cgit v1.2.3


From b5deef901282628d88c784f4c9d2f0583ec3b355 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 19 Jul 2006 23:39:40 +0200
Subject: [PATCH] Make sure all block/io scheduler setups are node aware

Some were kmalloc_node(), some were still kmalloc(). Change them all to
kmalloc_node().

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c     |  8 ++++----
 block/cfq-iosched.c    | 13 +++++++------
 block/elevator.c       | 11 ++++++-----
 block/ll_rw_blk.c      | 13 +++++++------
 block/noop-iosched.c   |  2 +-
 include/linux/blkdev.h |  3 +--
 6 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index 8e1fef1eafc9..f6dc95489316 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -210,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void)
  * If the current task has no AS IO context then create one and initialise it.
  * Then take a ref on the task's io context and return it.
  */
-static struct io_context *as_get_io_context(void)
+static struct io_context *as_get_io_context(int node)
 {
-	struct io_context *ioc = get_io_context(GFP_ATOMIC);
+	struct io_context *ioc = get_io_context(GFP_ATOMIC, node);
 	if (ioc && !ioc->aic) {
 		ioc->aic = alloc_as_io_context();
 		if (!ioc->aic) {
@@ -1148,7 +1148,7 @@ static void as_add_request(request_queue_t *q, struct request *rq)
 
 	data_dir = rq_is_sync(rq);
 
-	rq->elevator_private = as_get_io_context();
+	rq->elevator_private = as_get_io_context(q->node);
 
 	if (RQ_IOC(rq)) {
 		as_update_iohist(ad, RQ_IOC(rq)->aic, rq);
@@ -1292,7 +1292,7 @@ static int as_may_queue(request_queue_t *q, int rw)
 	struct io_context *ioc;
 	if (ad->antic_status == ANTIC_WAIT_REQ ||
 			ad->antic_status == ANTIC_WAIT_NEXT) {
-		ioc = as_get_io_context();
+		ioc = as_get_io_context(q->node);
 		if (ad->io_context == ioc)
 			ret = ELV_MQUEUE_MUST;
 		put_io_context(ioc);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 85f1d87e86d4..0452108a932e 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1148,8 +1148,9 @@ static void cfq_exit_io_context(struct io_context *ioc)
 static struct cfq_io_context *
 cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
-	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
+	struct cfq_io_context *cic;
 
+	cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask, cfqd->queue->node);
 	if (cic) {
 		memset(cic, 0, sizeof(*cic));
 		cic->last_end_request = jiffies;
@@ -1277,11 +1278,11 @@ retry:
 			 * free memory.
 			 */
 			spin_unlock_irq(cfqd->queue->queue_lock);
-			new_cfqq = kmem_cache_alloc(cfq_pool, gfp_mask|__GFP_NOFAIL);
+			new_cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask|__GFP_NOFAIL, cfqd->queue->node);
 			spin_lock_irq(cfqd->queue->queue_lock);
 			goto retry;
 		} else {
-			cfqq = kmem_cache_alloc(cfq_pool, gfp_mask);
+			cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask, cfqd->queue->node);
 			if (!cfqq)
 				goto out;
 		}
@@ -1407,7 +1408,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 
 	might_sleep_if(gfp_mask & __GFP_WAIT);
 
-	ioc = get_io_context(gfp_mask);
+	ioc = get_io_context(gfp_mask, cfqd->queue->node);
 	if (!ioc)
 		return NULL;
 
@@ -1955,7 +1956,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	struct cfq_data *cfqd;
 	int i;
 
-	cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
+	cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL, q->node);
 	if (!cfqd)
 		return NULL;
 
@@ -1970,7 +1971,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&cfqd->empty_list);
 	INIT_LIST_HEAD(&cfqd->cic_list);
 
-	cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
+	cfqd->cfq_hash = kmalloc_node(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL, q->node);
 	if (!cfqd->cfq_hash)
 		goto out_free;
 
diff --git a/block/elevator.c b/block/elevator.c
index 788d2d81994c..e643291793a4 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -161,12 +161,12 @@ __setup("elevator=", elevator_setup);
 
 static struct kobj_type elv_ktype;
 
-static elevator_t *elevator_alloc(struct elevator_type *e)
+static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)
 {
 	elevator_t *eq;
 	int i;
 
-	eq = kmalloc(sizeof(elevator_t), GFP_KERNEL);
+	eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);
 	if (unlikely(!eq))
 		goto err;
 
@@ -178,7 +178,8 @@ static elevator_t *elevator_alloc(struct elevator_type *e)
 	eq->kobj.ktype = &elv_ktype;
 	mutex_init(&eq->sysfs_lock);
 
-	eq->hash = kmalloc(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, GFP_KERNEL);
+	eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
+					GFP_KERNEL, q->node);
 	if (!eq->hash)
 		goto err;
 
@@ -224,7 +225,7 @@ int elevator_init(request_queue_t *q, char *name)
 		e = elevator_get("noop");
 	}
 
-	eq = elevator_alloc(e);
+	eq = elevator_alloc(q, e);
 	if (!eq)
 		return -ENOMEM;
 
@@ -987,7 +988,7 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 	/*
 	 * Allocate new elevator
 	 */
-	e = elevator_alloc(new_e);
+	e = elevator_alloc(q, new_e);
 	if (!e)
 		return 0;
 
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 4b7b4461e8d6..c6dfa889206c 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -39,6 +39,7 @@ static void blk_unplug_timeout(unsigned long data);
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
 static void init_request_from_bio(struct request *req, struct bio *bio);
 static int __make_request(request_queue_t *q, struct bio *bio);
+static struct io_context *current_io_context(gfp_t gfp_flags, int node);
 
 /*
  * For the allocated request tables
@@ -2114,7 +2115,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 
 	if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
 		if (rl->count[rw]+1 >= q->nr_requests) {
-			ioc = current_io_context(GFP_ATOMIC);
+			ioc = current_io_context(GFP_ATOMIC, q->node);
 			/*
 			 * The queue will fill after this allocation, so set
 			 * it as full, and mark this process as "batching".
@@ -2234,7 +2235,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 			 * up to a big batch of them for a small period time.
 			 * See ioc_batching, ioc_set_batching
 			 */
-			ioc = current_io_context(GFP_NOIO);
+			ioc = current_io_context(GFP_NOIO, q->node);
 			ioc_set_batching(q, ioc);
 
 			spin_lock_irq(q->queue_lock);
@@ -3641,7 +3642,7 @@ void exit_io_context(void)
  * but since the current task itself holds a reference, the context can be
  * used in general code, so long as it stays within `current` context.
  */
-struct io_context *current_io_context(gfp_t gfp_flags)
+static struct io_context *current_io_context(gfp_t gfp_flags, int node)
 {
 	struct task_struct *tsk = current;
 	struct io_context *ret;
@@ -3650,7 +3651,7 @@ struct io_context *current_io_context(gfp_t gfp_flags)
 	if (likely(ret))
 		return ret;
 
-	ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
+	ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
 	if (ret) {
 		atomic_set(&ret->refcount, 1);
 		ret->task = current;
@@ -3674,10 +3675,10 @@ EXPORT_SYMBOL(current_io_context);
  *
  * This is always called in the context of the task which submitted the I/O.
  */
-struct io_context *get_io_context(gfp_t gfp_flags)
+struct io_context *get_io_context(gfp_t gfp_flags, int node)
 {
 	struct io_context *ret;
-	ret = current_io_context(gfp_flags);
+	ret = current_io_context(gfp_flags, node);
 	if (likely(ret))
 		atomic_inc(&ret->refcount);
 	return ret;
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 56a7c620574f..79af43179421 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -69,7 +69,7 @@ static void *noop_init_queue(request_queue_t *q, elevator_t *e)
 {
 	struct noop_data *nd;
 
-	nd = kmalloc(sizeof(*nd), GFP_KERNEL);
+	nd = kmalloc_node(sizeof(*nd), GFP_KERNEL, q->node);
 	if (!nd)
 		return NULL;
 	INIT_LIST_HEAD(&nd->queue);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 593386162f47..6609371c303e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -104,8 +104,7 @@ struct io_context {
 
 void put_io_context(struct io_context *ioc);
 void exit_io_context(void);
-struct io_context *current_io_context(gfp_t gfp_flags);
-struct io_context *get_io_context(gfp_t gfp_flags);
+struct io_context *get_io_context(gfp_t gfp_flags, int node);
 void copy_io_context(struct io_context **pdst, struct io_context **psrc);
 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
 
-- 
cgit v1.2.3


From dc72ef4ae35c2016fb594bcc85ce871376682174 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 20 Jul 2006 14:54:05 +0200
Subject: [PATCH] Add blk_start_queueing() helper

CFQ implements this on its own now, but it's really block layer
knowledge. Tells a device queue to start dispatching requests to
the driver, taking care to unplug if needed. Also fixes the issue
where as/cfq will invoke a stopped queue, which we really don't
want.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/as-iosched.c     |  3 +--
 block/cfq-iosched.c    | 22 ++++------------------
 block/ll_rw_blk.c      | 25 ++++++++++++++++++++-----
 include/linux/blkdev.h |  1 +
 4 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/block/as-iosched.c b/block/as-iosched.c
index f6dc95489316..bc13dc0b29be 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1280,8 +1280,7 @@ static void as_work_handler(void *data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (!as_queue_empty(q))
-		q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6fb1613d44d7..9f684cc66bd1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1552,19 +1552,6 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	__cfq_set_active_queue(cfqd, cfqq);
 }
 
-/*
- * should really be a ll_rw_blk.c helper
- */
-static void cfq_start_queueing(struct cfq_data *cfqd, struct cfq_queue *cfqq)
-{
-	request_queue_t *q = cfqd->queue;
-
-	if (!blk_queue_plugged(q))
-		q->request_fn(q);
-	else
-		__generic_unplug_device(q);
-}
-
 /*
  * Called when a new fs request (rq) is added (to cfqq). Check if there's
  * something we should do about it
@@ -1593,7 +1580,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		if (cic == cfqd->active_cic &&
 		    del_timer(&cfqd->idle_slice_timer)) {
 			cfq_slice_expired(cfqd, 0);
-			cfq_start_queueing(cfqd, cfqq);
+			blk_start_queueing(cfqd->queue);
 		}
 		return;
 	}
@@ -1614,7 +1601,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		if (cfq_cfqq_wait_request(cfqq)) {
 			cfq_mark_cfqq_must_dispatch(cfqq);
 			del_timer(&cfqd->idle_slice_timer);
-			cfq_start_queueing(cfqd, cfqq);
+			blk_start_queueing(cfqd->queue);
 		}
 	} else if (cfq_should_preempt(cfqd, cfqq, rq)) {
 		/*
@@ -1624,7 +1611,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 */
 		cfq_preempt_queue(cfqd, cfqq);
 		cfq_mark_cfqq_must_dispatch(cfqq);
-		cfq_start_queueing(cfqd, cfqq);
+		blk_start_queueing(cfqd->queue);
 	}
 }
 
@@ -1832,8 +1819,7 @@ static void cfq_kick_queue(void *data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	blk_remove_plug(q);
-	q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index c6dfa889206c..346be9ae31f6 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2266,6 +2266,25 @@ struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_get_request);
 
+/**
+ * blk_start_queueing - initiate dispatch of requests to device
+ * @q:		request queue to kick into gear
+ *
+ * This is basically a helper to remove the need to know whether a queue
+ * is plugged or not if someone just wants to initiate dispatch of requests
+ * for this queue.
+ *
+ * The queue lock must be held with interrupts disabled.
+ */
+void blk_start_queueing(request_queue_t *q)
+{
+	if (!blk_queue_plugged(q))
+		q->request_fn(q);
+	else
+		__generic_unplug_device(q);
+}
+EXPORT_SYMBOL(blk_start_queueing);
+
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
@@ -2333,11 +2352,7 @@ void blk_insert_request(request_queue_t *q, struct request *rq,
 
 	drive_stat_acct(rq, rq->nr_sectors, 1);
 	__elv_add_request(q, rq, where, 0);
-
-	if (blk_queue_plugged(q))
-		__generic_unplug_device(q);
-	else
-		q->request_fn(q);
+	blk_start_queueing(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6609371c303e..5d8e288db163 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -635,6 +635,7 @@ extern void blk_stop_queue(request_queue_t *q);
 extern void blk_sync_queue(struct request_queue *q);
 extern void __blk_stop_queue(request_queue_t *q);
 extern void blk_run_queue(request_queue_t *);
+extern void blk_start_queueing(request_queue_t *);
 extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
 extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int);
 extern int blk_rq_unmap_user(struct bio *, unsigned int);
-- 
cgit v1.2.3


From 5404bc7a87b9949cf61e0174b21f80e73239ab25 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 10 Aug 2006 09:01:02 +0200
Subject: [PATCH] Allow file systems to differentiate between data and meta
 reads

We can use this information for making more intelligent priority
decisions, and it will also be useful for blktrace.

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/ll_rw_blk.c      | 2 ++
 include/linux/bio.h    | 2 ++
 include/linux/blkdev.h | 3 +++
 include/linux/fs.h     | 1 +
 4 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index e3980ec747c1..57992ae511c2 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2884,6 +2884,8 @@ static void init_request_from_bio(struct request *req, struct bio *bio)
 
 	if (bio_sync(bio))
 		req->cmd_flags |= REQ_RW_SYNC;
+	if (bio_rw_meta(bio))
+		req->cmd_flags |= REQ_RW_META;
 
 	req->errors = 0;
 	req->hard_sector = req->sector = bio->bi_sector;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 76bdaeab6f62..711c321a7011 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -148,6 +148,7 @@ struct bio {
 #define BIO_RW_BARRIER	2
 #define BIO_RW_FAILFAST	3
 #define BIO_RW_SYNC	4
+#define BIO_RW_META	5
 
 /*
  * upper 16 bits of bi_rw define the io priority of this bio
@@ -178,6 +179,7 @@ struct bio {
 #define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
 #define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
+#define bio_rw_meta(bio)	((bio)->bi_rw & (1 << BIO_RW_META))
 
 /*
  * will die
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5d8e288db163..2c01a90998a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -180,6 +180,7 @@ enum rq_flag_bits {
 	__REQ_ORDERED_COLOR,	/* is before or after barrier */
 	__REQ_RW_SYNC,		/* request is sync (O_DIRECT) */
 	__REQ_ALLOCED,		/* request came from our alloc pool */
+	__REQ_RW_META,		/* metadata io request */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -200,6 +201,7 @@ enum rq_flag_bits {
 #define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 #define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
 #define REQ_ALLOCED	(1 << __REQ_ALLOCED)
+#define REQ_RW_META	(1 << __REQ_RW_META)
 
 #define BLK_MAX_CDB	16
 
@@ -543,6 +545,7 @@ enum {
  * We regard a request as sync, if it's a READ or a SYNC write.
  */
 #define rq_is_sync(rq)		(rq_data_dir((rq)) == READ || (rq)->cmd_flags & REQ_RW_SYNC)
+#define rq_is_meta(rq)		((rq)->cmd_flags & REQ_RW_META)
 
 static inline int blk_queue_full(struct request_queue *q, int rw)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9306f63bf77e..d68c37af4dfb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -80,6 +80,7 @@ extern int dir_notify_enable;
 #define READA 2		/* read-ahead  - don't block if no resources */
 #define SWRITE 3	/* for ll_rw_block() - wait for buffer lock */
 #define READ_SYNC	(READ | (1 << BIO_RW_SYNC))
+#define READ_META	(READ | (1 << BIO_RW_META))
 #define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
 #define WRITE_BARRIER	((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
 
-- 
cgit v1.2.3


From 7457e6e2d7406c7009e9ad03db1335fe93b5fb71 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Sun, 23 Jul 2006 02:12:01 +0200
Subject: [PATCH] blktrace: support for logging metadata reads

Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/blktrace.c             | 5 ++++-
 include/linux/blktrace_api.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blktrace.c b/block/blktrace.c
index 8ff33441d8a2..2592f215a905 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -69,7 +69,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK
 /*
  * Bio action bits of interest
  */
-static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD) };
+static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
 
 /*
  * More could be added as needed, taking care to increment the decrementer
@@ -81,6 +81,8 @@ static u32 bio_act[5] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_AC
 	(((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
 #define trace_ahead_bit(rw)	\
 	(((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
+#define trace_meta_bit(rw)	\
+	(((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
 
 /*
  * The worker for the various blk_add_trace*() types. Fills out a
@@ -103,6 +105,7 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 	what |= bio_act[trace_barrier_bit(rw)];
 	what |= bio_act[trace_sync_bit(rw)];
 	what |= bio_act[trace_ahead_bit(rw)];
+	what |= bio_act[trace_meta_bit(rw)];
 
 	pid = tsk->pid;
 	if (unlikely(act_log_check(bt, what, sector, pid)))
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index ea48eb1b3fd3..b99a714fcac6 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -20,6 +20,7 @@ enum blktrace_cat {
 	BLK_TC_PC	= 1 << 9,	/* pc requests */
 	BLK_TC_NOTIFY	= 1 << 10,	/* special message */
 	BLK_TC_AHEAD	= 1 << 11,	/* readahead */
+	BLK_TC_META	= 1 << 12,	/* metadata */
 
 	BLK_TC_END	= 1 << 15,	/* only 16-bits, reminder */
 };
-- 
cgit v1.2.3


From cf9a2ae8d49948f861b56e5333530e491a9da190 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 29 Aug 2006 19:05:54 +0100
Subject: [PATCH] BLOCK: Move functions out of buffer code [try #6]

Move some functions out of the buffering code that aren't strictly buffering
specific.  This is a precursor to being able to disable the block layer.

 (*) Moved some stuff out of fs/buffer.c:

     (*) The file sync and general sync stuff moved to fs/sync.c.

     (*) The superblock sync stuff moved to fs/super.c.

     (*) do_invalidatepage() moved to mm/truncate.c.

     (*) try_to_release_page() moved to mm/filemap.c.

 (*) Moved some related declarations between header files:

     (*) declarations for do_invalidatepage() and try_to_release_page() moved
     	 to linux/mm.h.

     (*) __set_page_dirty_buffers() moved to linux/buffer_head.h.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/buffer.c                 | 174 --------------------------------------------
 fs/super.c                  |  31 ++++++++
 fs/sync.c                   | 113 ++++++++++++++++++++++++++++
 include/linux/buffer_head.h |   3 +-
 include/linux/fs.h          |   1 +
 include/linux/mm.h          |   4 +-
 mm/filemap.c                |  30 ++++++++
 mm/page-writeback.c         |   1 +
 mm/truncate.c               |  24 ++++++
 9 files changed, 204 insertions(+), 177 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 3b6d701073e7..16cfbcd254f1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -159,31 +159,6 @@ int sync_blockdev(struct block_device *bdev)
 }
 EXPORT_SYMBOL(sync_blockdev);
 
-static void __fsync_super(struct super_block *sb)
-{
-	sync_inodes_sb(sb, 0);
-	DQUOT_SYNC(sb);
-	lock_super(sb);
-	if (sb->s_dirt && sb->s_op->write_super)
-		sb->s_op->write_super(sb);
-	unlock_super(sb);
-	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, 1);
-	sync_blockdev(sb->s_bdev);
-	sync_inodes_sb(sb, 1);
-}
-
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock.  Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
-{
-	__fsync_super(sb);
-	return sync_blockdev(sb->s_bdev);
-}
-
 /*
  * Write out and wait upon all dirty data associated with this
  * device.   Filesystem data as well as the underlying block
@@ -259,118 +234,6 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
 }
 EXPORT_SYMBOL(thaw_bdev);
 
-/*
- * sync everything.  Start out by waking pdflush, because that writes back
- * all queues in parallel.
- */
-static void do_sync(unsigned long wait)
-{
-	wakeup_pdflush(0);
-	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
-	DQUOT_SYNC(NULL);
-	sync_supers();		/* Write the superblocks */
-	sync_filesystems(0);	/* Start syncing the filesystems */
-	sync_filesystems(wait);	/* Waitingly sync the filesystems */
-	sync_inodes(wait);	/* Mappings, inodes and blockdevs, again. */
-	if (!wait)
-		printk("Emergency Sync complete\n");
-	if (unlikely(laptop_mode))
-		laptop_sync_completion();
-}
-
-asmlinkage long sys_sync(void)
-{
-	do_sync(1);
-	return 0;
-}
-
-void emergency_sync(void)
-{
-	pdflush_operation(do_sync, 0);
-}
-
-/*
- * Generic function to fsync a file.
- *
- * filp may be NULL if called via the msync of a vma.
- */
- 
-int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
-{
-	struct inode * inode = dentry->d_inode;
-	struct super_block * sb;
-	int ret, err;
-
-	/* sync the inode to buffers */
-	ret = write_inode_now(inode, 0);
-
-	/* sync the superblock to buffers */
-	sb = inode->i_sb;
-	lock_super(sb);
-	if (sb->s_op->write_super)
-		sb->s_op->write_super(sb);
-	unlock_super(sb);
-
-	/* .. finally sync the buffers to disk */
-	err = sync_blockdev(sb->s_bdev);
-	if (!ret)
-		ret = err;
-	return ret;
-}
-
-long do_fsync(struct file *file, int datasync)
-{
-	int ret;
-	int err;
-	struct address_space *mapping = file->f_mapping;
-
-	if (!file->f_op || !file->f_op->fsync) {
-		/* Why?  We can still call filemap_fdatawrite */
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = filemap_fdatawrite(mapping);
-
-	/*
-	 * We need to protect against concurrent writers, which could cause
-	 * livelocks in fsync_buffers_list().
-	 */
-	mutex_lock(&mapping->host->i_mutex);
-	err = file->f_op->fsync(file, file->f_dentry, datasync);
-	if (!ret)
-		ret = err;
-	mutex_unlock(&mapping->host->i_mutex);
-	err = filemap_fdatawait(mapping);
-	if (!ret)
-		ret = err;
-out:
-	return ret;
-}
-
-static long __do_fsync(unsigned int fd, int datasync)
-{
-	struct file *file;
-	int ret = -EBADF;
-
-	file = fget(fd);
-	if (file) {
-		ret = do_fsync(file, datasync);
-		fput(file);
-	}
-	return ret;
-}
-
-asmlinkage long sys_fsync(unsigned int fd)
-{
-	return __do_fsync(fd, 0);
-}
-
-asmlinkage long sys_fdatasync(unsigned int fd)
-{
-	return __do_fsync(fd, 1);
-}
-
 /*
  * Various filesystems appear to want __find_get_block to be non-blocking.
  * But it's the page lock which protects the buffers.  To get around this,
@@ -1550,35 +1413,6 @@ static void discard_buffer(struct buffer_head * bh)
 	unlock_buffer(bh);
 }
 
-/**
- * try_to_release_page() - release old fs-specific metadata on a page
- *
- * @page: the page which the kernel is trying to free
- * @gfp_mask: memory allocation flags (and I/O mode)
- *
- * The address_space is to try to release any data against the page
- * (presumably at page->private).  If the release was successful, return `1'.
- * Otherwise return zero.
- *
- * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
- *
- * NOTE: @gfp_mask may go away, and this function may become non-blocking.
- */
-int try_to_release_page(struct page *page, gfp_t gfp_mask)
-{
-	struct address_space * const mapping = page->mapping;
-
-	BUG_ON(!PageLocked(page));
-	if (PageWriteback(page))
-		return 0;
-	
-	if (mapping && mapping->a_ops->releasepage)
-		return mapping->a_ops->releasepage(page, gfp_mask);
-	return try_to_free_buffers(page);
-}
-EXPORT_SYMBOL(try_to_release_page);
-
 /**
  * block_invalidatepage - invalidate part of all of a buffer-backed page
  *
@@ -1630,14 +1464,6 @@ out:
 }
 EXPORT_SYMBOL(block_invalidatepage);
 
-void do_invalidatepage(struct page *page, unsigned long offset)
-{
-	void (*invalidatepage)(struct page *, unsigned long);
-	invalidatepage = page->mapping->a_ops->invalidatepage ? :
-		block_invalidatepage;
-	(*invalidatepage)(page, offset);
-}
-
 /*
  * We attach and possibly dirty the buffers atomically wrt
  * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
diff --git a/fs/super.c b/fs/super.c
index 6987824d0dce..15671cd048b1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -220,6 +220,37 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
 	return 0;
 }
 
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.  Requires a second blkdev
+ * flush by the caller to complete the operation.
+ */
+void __fsync_super(struct super_block *sb)
+{
+	sync_inodes_sb(sb, 0);
+	DQUOT_SYNC(sb);
+	lock_super(sb);
+	if (sb->s_dirt && sb->s_op->write_super)
+		sb->s_op->write_super(sb);
+	unlock_super(sb);
+	if (sb->s_op->sync_fs)
+		sb->s_op->sync_fs(sb, 1);
+	sync_blockdev(sb->s_bdev);
+	sync_inodes_sb(sb, 1);
+}
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock.  Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int fsync_super(struct super_block *sb)
+{
+	__fsync_super(sb);
+	return sync_blockdev(sb->s_bdev);
+}
+
 /**
  *	generic_shutdown_super	-	common helper for ->kill_sb()
  *	@sb: superblock to kill
diff --git a/fs/sync.c b/fs/sync.c
index 955aef04da28..1de747b5ddb9 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -10,10 +10,123 @@
 #include <linux/syscalls.h>
 #include <linux/linkage.h>
 #include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
 
 #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
 			SYNC_FILE_RANGE_WAIT_AFTER)
 
+/*
+ * sync everything.  Start out by waking pdflush, because that writes back
+ * all queues in parallel.
+ */
+static void do_sync(unsigned long wait)
+{
+	wakeup_pdflush(0);
+	sync_inodes(0);		/* All mappings, inodes and their blockdevs */
+	DQUOT_SYNC(NULL);
+	sync_supers();		/* Write the superblocks */
+	sync_filesystems(0);	/* Start syncing the filesystems */
+	sync_filesystems(wait);	/* Waitingly sync the filesystems */
+	sync_inodes(wait);	/* Mappings, inodes and blockdevs, again. */
+	if (!wait)
+		printk("Emergency Sync complete\n");
+	if (unlikely(laptop_mode))
+		laptop_sync_completion();
+}
+
+asmlinkage long sys_sync(void)
+{
+	do_sync(1);
+	return 0;
+}
+
+void emergency_sync(void)
+{
+	pdflush_operation(do_sync, 0);
+}
+
+/*
+ * Generic function to fsync a file.
+ *
+ * filp may be NULL if called via the msync of a vma.
+ */
+int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+	struct inode * inode = dentry->d_inode;
+	struct super_block * sb;
+	int ret, err;
+
+	/* sync the inode to buffers */
+	ret = write_inode_now(inode, 0);
+
+	/* sync the superblock to buffers */
+	sb = inode->i_sb;
+	lock_super(sb);
+	if (sb->s_op->write_super)
+		sb->s_op->write_super(sb);
+	unlock_super(sb);
+
+	/* .. finally sync the buffers to disk */
+	err = sync_blockdev(sb->s_bdev);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+long do_fsync(struct file *file, int datasync)
+{
+	int ret;
+	int err;
+	struct address_space *mapping = file->f_mapping;
+
+	if (!file->f_op || !file->f_op->fsync) {
+		/* Why?  We can still call filemap_fdatawrite */
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = filemap_fdatawrite(mapping);
+
+	/*
+	 * We need to protect against concurrent writers, which could cause
+	 * livelocks in fsync_buffers_list().
+	 */
+	mutex_lock(&mapping->host->i_mutex);
+	err = file->f_op->fsync(file, file->f_dentry, datasync);
+	if (!ret)
+		ret = err;
+	mutex_unlock(&mapping->host->i_mutex);
+	err = filemap_fdatawait(mapping);
+	if (!ret)
+		ret = err;
+out:
+	return ret;
+}
+
+static long __do_fsync(unsigned int fd, int datasync)
+{
+	struct file *file;
+	int ret = -EBADF;
+
+	file = fget(fd);
+	if (file) {
+		ret = do_fsync(file, datasync);
+		fput(file);
+	}
+	return ret;
+}
+
+asmlinkage long sys_fsync(unsigned int fd)
+{
+	return __do_fsync(fd, 0);
+}
+
+asmlinkage long sys_fdatasync(unsigned int fd)
+{
+	return __do_fsync(fd, 1);
+}
+
 /*
  * sys_sync_file_range() permits finely controlled syncing over a segment of
  * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 737e407d0cd1..64b508e35d2a 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -190,9 +190,7 @@ extern int buffer_heads_over_limit;
  * Generic address_space_operations implementations for buffer_head-backed
  * address_spaces.
  */
-int try_to_release_page(struct page * page, gfp_t gfp_mask);
 void block_invalidatepage(struct page *page, unsigned long offset);
-void do_invalidatepage(struct page *page, unsigned long offset);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
 int block_read_full_page(struct page*, get_block_t*);
@@ -302,4 +300,5 @@ static inline void lock_buffer(struct buffer_head *bh)
 		__lock_buffer(bh);
 }
 
+extern int __set_page_dirty_buffers(struct page *page);
 #endif /* _LINUX_BUFFER_HEAD_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d68c37af4dfb..1728142ec4b6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1546,6 +1546,7 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
 extern long do_fsync(struct file *file, int datasync);
 extern void sync_supers(void);
 extern void sync_filesystems(int wait);
+extern void __fsync_super(struct super_block *sb);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
 extern int do_remount_sb(struct super_block *sb, int flags,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b703b6d4358..4edf1934e5ca 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -743,7 +743,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
 		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
 void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
 
-int __set_page_dirty_buffers(struct page *page);
+extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
+extern void do_invalidatepage(struct page *page, unsigned long offset);
+
 int __set_page_dirty_nobuffers(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
diff --git a/mm/filemap.c b/mm/filemap.c
index 3277f3b23524..d6846de08887 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2491,3 +2491,33 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	}
 	return retval;
 }
+
+/**
+ * try_to_release_page() - release old fs-specific metadata on a page
+ *
+ * @page: the page which the kernel is trying to free
+ * @gfp_mask: memory allocation flags (and I/O mode)
+ *
+ * The address_space is to try to release any data against the page
+ * (presumably at page->private).  If the release was successful, return `1'.
+ * Otherwise return zero.
+ *
+ * The @gfp_mask argument specifies whether I/O may be performed to release
+ * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
+ *
+ * NOTE: @gfp_mask may go away, and this function may become non-blocking.
+ */
+int try_to_release_page(struct page *page, gfp_t gfp_mask)
+{
+	struct address_space * const mapping = page->mapping;
+
+	BUG_ON(!PageLocked(page));
+	if (PageWriteback(page))
+		return 0;
+
+	if (mapping && mapping->a_ops->releasepage)
+		return mapping->a_ops->releasepage(page, gfp_mask);
+	return try_to_free_buffers(page);
+}
+
+EXPORT_SYMBOL(try_to_release_page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 488b7088557c..9fdcc7903956 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -30,6 +30,7 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
+#include <linux/buffer_head.h>
 
 /*
  * The maximum number of pages to writeout in a single bdflush/kupdate
diff --git a/mm/truncate.c b/mm/truncate.c
index a654928323dc..cd3e34b816db 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -17,6 +17,30 @@
 				   do_invalidatepage */
 
 
+/**
+ * do_invalidatepage - invalidate part of all of a page
+ * @page: the page which is affected
+ * @offset: the index of the truncation point
+ *
+ * do_invalidatepage() is called when all or part of the page has become
+ * invalidated by a truncate operation.
+ *
+ * do_invalidatepage() does not have to release all buffers, but it must
+ * ensure that no dirty buffer is left outside @offset and that no I/O
+ * is underway against any of the blocks which are outside the truncation
+ * point.  Because the caller is about to free (and possibly reuse) those
+ * blocks on-disk.
+ */
+void do_invalidatepage(struct page *page, unsigned long offset)
+{
+	void (*invalidatepage)(struct page *, unsigned long);
+	invalidatepage = page->mapping->a_ops->invalidatepage;
+	if (!invalidatepage)
+		invalidatepage = block_invalidatepage;
+	if (invalidatepage)
+		(*invalidatepage)(page, offset);
+}
+
 static inline void truncate_partial_page(struct page *page, unsigned partial)
 {
 	memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
-- 
cgit v1.2.3


From 0d67a46df0125e20d14f12dbd3646f1f1bf23e8c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 29 Aug 2006 19:05:56 +0100
Subject: [PATCH] BLOCK: Remove duplicate declaration of exit_io_context() [try
 #6]

Remove the duplicate declaration of exit_io_context() from linux/sched.h.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sched.h | 1 -
 kernel/exit.c         | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a06fc89cf6e5..fc4a9873ec10 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -710,7 +710,6 @@ extern unsigned int max_cache_size;
 
 
 struct io_context;			/* See blkdev.h */
-void exit_io_context(void);
 struct cpuset;
 
 #define NGROUPS_SMALL		32
diff --git a/kernel/exit.c b/kernel/exit.c
index 2e4c13cba95a..c189de2927ab 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -38,6 +38,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
+#include <linux/blkdev.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
-- 
cgit v1.2.3


From 07f3f05c1e3052b8656129b2a5aca9f888241a34 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 30 Sep 2006 20:52:18 +0200
Subject: [PATCH] BLOCK: Move extern declarations out of fs/*.c into header
 files [try #6]

Create a new header file, fs/internal.h, for common definitions local to the
sources in the fs/ directory.

Move extern definitions that should be in header files from fs/*.c to
fs/internal.h or other main header files where they span directories.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/mips/kernel/signal_n32.c |  4 ++--
 fs/binfmt_elf.c               |  1 -
 fs/block_dev.c                |  1 +
 fs/char_dev.c                 |  1 +
 fs/compat.c                   | 10 +++-------
 fs/compat_ioctl.c             |  2 --
 fs/dcache.c                   |  4 +---
 fs/fs-writeback.c             |  3 +--
 fs/internal.h                 | 36 ++++++++++++++++++++++++++++++++++++
 fs/namespace.c                |  3 +--
 include/linux/ramfs.h         |  1 +
 include/linux/tty.h           |  3 +++
 kernel/compat.c               |  2 ++
 13 files changed, 52 insertions(+), 19 deletions(-)
 create mode 100644 fs/internal.h

(limited to 'include/linux')

diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c
index 477c5334ec1b..50c17eaa7f25 100644
--- a/arch/mips/kernel/signal_n32.c
+++ b/arch/mips/kernel/signal_n32.c
@@ -42,6 +42,8 @@
 
 #include "signal-common.h"
 
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
 /*
  * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
  */
@@ -81,8 +83,6 @@ struct rt_sigframe_n32 {
 #endif
 };
 
-extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
-
 save_static_function(sysn32_rt_sigsuspend);
 __attribute_used__ noinline static int
 _sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6eb48e1446ec..bad52433de69 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -46,7 +46,6 @@
 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
 static int load_elf_library(struct file *);
 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
-extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
 
 #ifndef elf_addr_t
 #define elf_addr_t unsigned long
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4346468139e8..20f7333ba372 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -22,6 +22,7 @@
 #include <linux/uio.h>
 #include <linux/namei.h>
 #include <asm/uaccess.h>
+#include "internal.h"
 
 struct bdev_inode {
 	struct block_device bdev;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 1f3285affa39..a885f46ca001 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -24,6 +24,7 @@
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
+#include "internal.h"
 
 /*
  * capabilities for /dev/mem, /dev/kmem and similar directly mappable character
diff --git a/fs/compat.c b/fs/compat.c
index ce982f6e8c80..122b4e3992b5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -52,11 +52,12 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/ioctls.h>
-
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+#include "internal.h"
 
 int compat_log = 1;
 
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
 int compat_printk(const char *fmt, ...)
 {
 	va_list ap;
@@ -313,9 +314,6 @@ out:
 #define IOCTL_HASHSIZE 256
 static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE];
 
-extern struct ioctl_trans ioctl_start[];
-extern int ioctl_table_size;
-
 static inline unsigned long ioctl32_hash(unsigned long cmd)
 {
 	return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE;
@@ -838,8 +836,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
 	return 0;
 }
 
-extern int copy_mount_options (const void __user *, unsigned long *);
-
 #define SMBFS_NAME      "smbfs"
 #define NCPFS_NAME      "ncpfs"
 #define NFS4_NAME	"nfs4"
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 4063a9396977..ab74c9bd55fe 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1279,8 +1279,6 @@ static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg);
-
 #ifdef CONFIG_VT
 
 static int vt_check(struct file *file)
diff --git a/fs/dcache.c b/fs/dcache.c
index 17b392a2049e..fc2faa44f8d1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
 #include <linux/seqlock.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
+#include "internal.h"
 
 
 int sysctl_vfs_cache_pressure __read_mostly = 100;
@@ -1877,9 +1878,6 @@ kmem_cache_t *filp_cachep __read_mostly;
 
 EXPORT_SYMBOL(d_genocide);
 
-extern void bdev_cache_init(void);
-extern void chrdev_init(void);
-
 void __init vfs_caches_init_early(void)
 {
 	dcache_init_early();
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 892643dc9af1..0639024d83a9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -22,8 +22,7 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
-
-extern struct super_block *blockdev_superblock;
+#include "internal.h"
 
 /**
  *	__mark_inode_dirty -	internal function
diff --git a/fs/internal.h b/fs/internal.h
new file mode 100644
index 000000000000..c21ecd37b1e7
--- /dev/null
+++ b/fs/internal.h
@@ -0,0 +1,36 @@
+/* fs/ internal definitions
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/ioctl32.h>
+
+/*
+ * block_dev.c
+ */
+extern struct super_block *blockdev_superblock;
+extern void __init bdev_cache_init(void);
+
+/*
+ * char_dev.c
+ */
+extern void __init chrdev_init(void);
+
+/*
+ * compat_ioctl.c
+ */
+#ifdef CONFIG_COMPAT
+extern struct ioctl_trans ioctl_start[];
+extern int ioctl_table_size;
+#endif
+
+/*
+ * namespace.c
+ */
+extern int copy_mount_options(const void __user *, unsigned long *);
diff --git a/fs/namespace.c b/fs/namespace.c
index 6ede3a539ed8..66d921e14fee 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -24,12 +24,11 @@
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/mount.h>
+#include <linux/ramfs.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include "pnode.h"
 
-extern int __init init_rootfs(void);
-
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
 
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 00b340ba6612..b160fb18e8d6 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -17,5 +17,6 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 
 extern const struct file_operations ramfs_file_operations;
 extern struct vm_operations_struct generic_file_vm_ops;
+extern int __init init_rootfs(void);
 
 #endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index ea4c2605f8da..44091c0db0b4 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -307,6 +307,9 @@ extern void tty_ldisc_put(int);
 extern void tty_wakeup(struct tty_struct *tty);
 extern void tty_ldisc_flush(struct tty_struct *tty);
 
+extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+		     unsigned long arg);
+
 extern struct mutex tty_mutex;
 
 /* n_tty.c */
diff --git a/kernel/compat.c b/kernel/compat.c
index 75573e5d27b0..b4fbd838cd77 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -26,6 +26,8 @@
 
 #include <asm/uaccess.h>
 
+extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
+
 int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
 {
 	return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
-- 
cgit v1.2.3


From 811d736f9e8013966e1a5a930c0db09508bdbb15 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 29 Aug 2006 19:06:09 +0100
Subject: [PATCH] BLOCK: Dissociate generic_writepages() from mpage stuff [try
 #6]

Dissociate the generic_writepages() function from the mpage stuff, moving its
declaration to linux/mm.h and actually emitting a full implementation into
mm/page-writeback.c.

The implementation is a partial duplicate of mpage_writepages() with all BIO
references removed.

It is used by NFS to do writeback.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c            |   1 +
 fs/mpage.c                |   2 +
 include/linux/mpage.h     |   6 ---
 include/linux/writeback.h |   2 +
 mm/page-writeback.c       | 134 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 139 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 20f7333ba372..335c38bb86eb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/blkpg.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/mount.h>
 #include <linux/uio.h>
diff --git a/fs/mpage.c b/fs/mpage.c
index 1e4598247d0b..692a3e578fc8 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -693,6 +693,8 @@ out:
  * the call was made get new I/O started against them.  If wbc->sync_mode is
  * WB_SYNC_ALL then we were called for data integrity and we must wait for
  * existing IO to complete.
+ *
+ * If you fix this you should check generic_writepages() also!
  */
 int
 mpage_writepages(struct address_space *mapping,
diff --git a/include/linux/mpage.h b/include/linux/mpage.h
index 3ca880463c47..517c098fde20 100644
--- a/include/linux/mpage.h
+++ b/include/linux/mpage.h
@@ -20,9 +20,3 @@ int mpage_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, get_block_t get_block);
 int mpage_writepage(struct page *page, get_block_t *get_block,
 		struct writeback_control *wbc);
-
-static inline int
-generic_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
-	return mpage_writepages(mapping, wbc, NULL);
-}
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9d4074ecd0cd..4f4d98addb44 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -111,6 +111,8 @@ balance_dirty_pages_ratelimited(struct address_space *mapping)
 }
 
 int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
+extern int generic_writepages(struct address_space *mapping,
+			      struct writeback_control *wbc);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
 int sync_page_range(struct inode *inode, struct address_space *mapping,
 			loff_t pos, loff_t count);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 9fdcc7903956..ecf27839c203 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -31,6 +31,7 @@
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
+#include <linux/pagevec.h>
 
 /*
  * The maximum number of pages to writeout in a single bdflush/kupdate
@@ -551,6 +552,139 @@ void __init page_writeback_init(void)
 	register_cpu_notifier(&ratelimit_nb);
 }
 
+/**
+ * generic_writepages - walk the list of dirty pages of the given
+ *                      address space and writepage() all of them.
+ *
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ *
+ * This is a library function, which implements the writepages()
+ * address_space_operation.
+ *
+ * If a page is already under I/O, generic_writepages() skips it, even
+ * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them.  If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ *
+ * Derived from mpage_writepages() - if you fix this you should check that
+ * also!
+ */
+int generic_writepages(struct address_space *mapping,
+		       struct writeback_control *wbc)
+{
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+	int ret = 0;
+	int done = 0;
+	int (*writepage)(struct page *page, struct writeback_control *wbc);
+	struct pagevec pvec;
+	int nr_pages;
+	pgoff_t index;
+	pgoff_t end;		/* Inclusive */
+	int scanned = 0;
+	int range_whole = 0;
+
+	if (wbc->nonblocking && bdi_write_congested(bdi)) {
+		wbc->encountered_congestion = 1;
+		return 0;
+	}
+
+	writepage = mapping->a_ops->writepage;
+
+	/* deal with chardevs and other special file */
+	if (!writepage)
+		return 0;
+
+	pagevec_init(&pvec, 0);
+	if (wbc->range_cyclic) {
+		index = mapping->writeback_index; /* Start from prev offset */
+		end = -1;
+	} else {
+		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+			range_whole = 1;
+		scanned = 1;
+	}
+retry:
+	while (!done && (index <= end) &&
+	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+					      PAGECACHE_TAG_DIRTY,
+					      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+		unsigned i;
+
+		scanned = 1;
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+
+			/*
+			 * At this point we hold neither mapping->tree_lock nor
+			 * lock on the page itself: the page may be truncated or
+			 * invalidated (changing page->mapping to NULL), or even
+			 * swizzled back from swapper_space to tmpfs file
+			 * mapping
+			 */
+			lock_page(page);
+
+			if (unlikely(page->mapping != mapping)) {
+				unlock_page(page);
+				continue;
+			}
+
+			if (!wbc->range_cyclic && page->index > end) {
+				done = 1;
+				unlock_page(page);
+				continue;
+			}
+
+			if (wbc->sync_mode != WB_SYNC_NONE)
+				wait_on_page_writeback(page);
+
+			if (PageWriteback(page) ||
+			    !clear_page_dirty_for_io(page)) {
+				unlock_page(page);
+				continue;
+			}
+
+			ret = (*writepage)(page, wbc);
+			if (ret) {
+				if (ret == -ENOSPC)
+					set_bit(AS_ENOSPC, &mapping->flags);
+				else
+					set_bit(AS_EIO, &mapping->flags);
+			}
+
+			if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
+				unlock_page(page);
+			if (ret || (--(wbc->nr_to_write) <= 0))
+				done = 1;
+			if (wbc->nonblocking && bdi_write_congested(bdi)) {
+				wbc->encountered_congestion = 1;
+				done = 1;
+			}
+		}
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+	if (!scanned && !done) {
+		/*
+		 * We hit the last page and there is more work to be done: wrap
+		 * back to the start of the file
+		 */
+		scanned = 1;
+		index = 0;
+		goto retry;
+	}
+	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+		mapping->writeback_index = index;
+	return ret;
+}
+
+EXPORT_SYMBOL(generic_writepages);
+
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
 	int ret;
-- 
cgit v1.2.3


From 863d5b822c02d0e7215fb84ca79e9f8c3e35f04e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 29 Aug 2006 19:06:14 +0100
Subject: [PATCH] BLOCK: Move the loop device ioctl compat stuff to the loop
 driver [try #6]

Move the loop device ioctl compat stuff from fs/compat_ioctl.c to the loop
driver so that the loop header file doesn't need to be included.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c         | 160 +++++++++++++++++++++++++++++++++++++++++++
 fs/compat_ioctl.c            |  68 ------------------
 include/linux/compat_ioctl.h |   6 --
 3 files changed, 160 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 68b0471ad5a6..d6bb8da955a2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -66,6 +66,7 @@
 #include <linux/swap.h>
 #include <linux/slab.h>
 #include <linux/loop.h>
+#include <linux/compat.h>
 #include <linux/suspend.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>		/* for invalidate_bdev() */
@@ -1165,6 +1166,162 @@ static int lo_ioctl(struct inode * inode, struct file * file,
 	return err;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_loop_info {
+	compat_int_t	lo_number;      /* ioctl r/o */
+	compat_dev_t	lo_device;      /* ioctl r/o */
+	compat_ulong_t	lo_inode;       /* ioctl r/o */
+	compat_dev_t	lo_rdevice;     /* ioctl r/o */
+	compat_int_t	lo_offset;
+	compat_int_t	lo_encrypt_type;
+	compat_int_t	lo_encrypt_key_size;    /* ioctl w/o */
+	compat_int_t	lo_flags;       /* ioctl r/o */
+	char		lo_name[LO_NAME_SIZE];
+	unsigned char	lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
+	compat_ulong_t	lo_init[2];
+	char		reserved[4];
+};
+
+/*
+ * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_from_compat(const struct compat_loop_info *arg,
+			struct loop_info64 *info64)
+{
+	struct compat_loop_info info;
+
+	if (copy_from_user(&info, arg, sizeof(info)))
+		return -EFAULT;
+
+	memset(info64, 0, sizeof(*info64));
+	info64->lo_number = info.lo_number;
+	info64->lo_device = info.lo_device;
+	info64->lo_inode = info.lo_inode;
+	info64->lo_rdevice = info.lo_rdevice;
+	info64->lo_offset = info.lo_offset;
+	info64->lo_sizelimit = 0;
+	info64->lo_encrypt_type = info.lo_encrypt_type;
+	info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
+	info64->lo_flags = info.lo_flags;
+	info64->lo_init[0] = info.lo_init[0];
+	info64->lo_init[1] = info.lo_init[1];
+	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+		memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
+	else
+		memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
+	memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
+	return 0;
+}
+
+/*
+ * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
+ * - noinlined to reduce stack space usage in main part of driver
+ */
+static noinline int
+loop_info64_to_compat(const struct loop_info64 *info64,
+		      struct compat_loop_info __user *arg)
+{
+	struct compat_loop_info info;
+
+	memset(&info, 0, sizeof(info));
+	info.lo_number = info64->lo_number;
+	info.lo_device = info64->lo_device;
+	info.lo_inode = info64->lo_inode;
+	info.lo_rdevice = info64->lo_rdevice;
+	info.lo_offset = info64->lo_offset;
+	info.lo_encrypt_type = info64->lo_encrypt_type;
+	info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
+	info.lo_flags = info64->lo_flags;
+	info.lo_init[0] = info64->lo_init[0];
+	info.lo_init[1] = info64->lo_init[1];
+	if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
+		memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
+	else
+		memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
+	memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
+
+	/* error in case values were truncated */
+	if (info.lo_device != info64->lo_device ||
+	    info.lo_rdevice != info64->lo_rdevice ||
+	    info.lo_inode != info64->lo_inode ||
+	    info.lo_offset != info64->lo_offset ||
+	    info.lo_init[0] != info64->lo_init[0] ||
+	    info.lo_init[1] != info64->lo_init[1])
+		return -EOVERFLOW;
+
+	if (copy_to_user(arg, &info, sizeof(info)))
+		return -EFAULT;
+	return 0;
+}
+
+static int
+loop_set_status_compat(struct loop_device *lo,
+		       const struct compat_loop_info __user *arg)
+{
+	struct loop_info64 info64;
+	int ret;
+
+	ret = loop_info64_from_compat(arg, &info64);
+	if (ret < 0)
+		return ret;
+	return loop_set_status(lo, &info64);
+}
+
+static int
+loop_get_status_compat(struct loop_device *lo,
+		       struct compat_loop_info __user *arg)
+{
+	struct loop_info64 info64;
+	int err = 0;
+
+	if (!arg)
+		err = -EINVAL;
+	if (!err)
+		err = loop_get_status(lo, &info64);
+	if (!err)
+		err = loop_info64_to_compat(&info64, arg);
+	return err;
+}
+
+static long lo_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
+	int err;
+
+	lock_kernel();
+	switch(cmd) {
+	case LOOP_SET_STATUS:
+		mutex_lock(&lo->lo_ctl_mutex);
+		err = loop_set_status_compat(
+			lo, (const struct compat_loop_info __user *) arg);
+		mutex_unlock(&lo->lo_ctl_mutex);
+		break;
+	case LOOP_GET_STATUS:
+		mutex_lock(&lo->lo_ctl_mutex);
+		err = loop_get_status_compat(
+			lo, (struct compat_loop_info __user *) arg);
+		mutex_unlock(&lo->lo_ctl_mutex);
+		break;
+	case LOOP_CLR_FD:
+	case LOOP_GET_STATUS64:
+	case LOOP_SET_STATUS64:
+		arg = (unsigned long) compat_ptr(arg);
+	case LOOP_SET_FD:
+	case LOOP_CHANGE_FD:
+		err = lo_ioctl(inode, file, cmd, arg);
+		break;
+	default:
+		err = -ENOIOCTLCMD;
+		break;
+	}
+	unlock_kernel();
+	return err;
+}
+#endif
+
 static int lo_open(struct inode *inode, struct file *file)
 {
 	struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
@@ -1192,6 +1349,9 @@ static struct block_device_operations lo_fops = {
 	.open =		lo_open,
 	.release =	lo_release,
 	.ioctl =	lo_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl =	lo_compat_ioctl,
+#endif
 };
 
 /*
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index ab74c9bd55fe..3b0cf7fbd95a 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -40,7 +40,6 @@
 #include <linux/if_pppox.h>
 #include <linux/mtio.h>
 #include <linux/cdrom.h>
-#include <linux/loop.h>
 #include <linux/auto_fs.h>
 #include <linux/auto_fs4.h>
 #include <linux/tty.h>
@@ -1214,71 +1213,6 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 	return err;
 }
 
-struct loop_info32 {
-	compat_int_t	lo_number;      /* ioctl r/o */
-	compat_dev_t	lo_device;      /* ioctl r/o */
-	compat_ulong_t	lo_inode;       /* ioctl r/o */
-	compat_dev_t	lo_rdevice;     /* ioctl r/o */
-	compat_int_t	lo_offset;
-	compat_int_t	lo_encrypt_type;
-	compat_int_t	lo_encrypt_key_size;    /* ioctl w/o */
-	compat_int_t	lo_flags;       /* ioctl r/o */
-	char		lo_name[LO_NAME_SIZE];
-	unsigned char	lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
-	compat_ulong_t	lo_init[2];
-	char		reserved[4];
-};
-
-static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	mm_segment_t old_fs = get_fs();
-	struct loop_info l;
-	struct loop_info32 __user *ul;
-	int err = -EINVAL;
-
-	ul = compat_ptr(arg);
-	switch(cmd) {
-	case LOOP_SET_STATUS:
-		err = get_user(l.lo_number, &ul->lo_number);
-		err |= __get_user(l.lo_device, &ul->lo_device);
-		err |= __get_user(l.lo_inode, &ul->lo_inode);
-		err |= __get_user(l.lo_rdevice, &ul->lo_rdevice);
-		err |= __copy_from_user(&l.lo_offset, &ul->lo_offset,
-		        8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
-		if (err) {
-			err = -EFAULT;
-		} else {
-			set_fs (KERNEL_DS);
-			err = sys_ioctl (fd, cmd, (unsigned long)&l);
-			set_fs (old_fs);
-		}
-		break;
-	case LOOP_GET_STATUS:
-		set_fs (KERNEL_DS);
-		err = sys_ioctl (fd, cmd, (unsigned long)&l);
-		set_fs (old_fs);
-		if (!err) {
-			err = put_user(l.lo_number, &ul->lo_number);
-			err |= __put_user(l.lo_device, &ul->lo_device);
-			err |= __put_user(l.lo_inode, &ul->lo_inode);
-			err |= __put_user(l.lo_rdevice, &ul->lo_rdevice);
-			err |= __copy_to_user(&ul->lo_offset, &l.lo_offset,
-				(unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
-			if (err)
-				err = -EFAULT;
-		}
-		break;
-	default: {
-		static int count;
-		if (++count <= 20)
-			printk("%s: Unknown loop ioctl cmd, fd(%d) "
-			       "cmd(%08x) arg(%08lx)\n",
-			       __FUNCTION__, fd, cmd, arg);
-	}
-	}
-	return err;
-}
-
 #ifdef CONFIG_VT
 
 static int vt_check(struct file *file)
@@ -2808,8 +2742,6 @@ HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
 HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
 HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
 HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
-HANDLE_IOCTL(LOOP_SET_STATUS, loop_status)
-HANDLE_IOCTL(LOOP_GET_STATUS, loop_status)
 #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
 HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
 #ifdef CONFIG_VT
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index bea0255196c4..98d40e08ba6e 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -395,12 +395,6 @@ COMPATIBLE_IOCTL(DVD_WRITE_STRUCT)
 COMPATIBLE_IOCTL(DVD_AUTH)
 /* pktcdvd */
 COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
-/* Big L */
-ULONG_IOCTL(LOOP_SET_FD)
-ULONG_IOCTL(LOOP_CHANGE_FD)
-COMPATIBLE_IOCTL(LOOP_CLR_FD)
-COMPATIBLE_IOCTL(LOOP_GET_STATUS64)
-COMPATIBLE_IOCTL(LOOP_SET_STATUS64)
 /* Big A */
 /* sparc only */
 /* Big Q for sound/OSS */
-- 
cgit v1.2.3


From 36695673b012096228ebdc1b39a6a5850daa474e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 29 Aug 2006 19:06:16 +0100
Subject: [PATCH] BLOCK: Move common FS-specific ioctls to linux/fs.h [try #6]

Move common FS-specific ioctls from linux/ext2_fs.h to linux/fs.h as FS_IOC_*
and FS_IOC32_* and have the users of them use those as a base.

Also move the GETFLAGS/SETFLAGS flags to linux/fs.h as FS_*_FL macros, and then
have the other users use them as a base.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/cifs/ioctl.c             |  7 +++--
 fs/compat_ioctl.c           | 15 -----------
 fs/hfsplus/hfsplus_fs.h     |  8 ++----
 fs/hfsplus/ioctl.c          | 17 ++++++------
 fs/jfs/ioctl.c              | 15 +++++------
 include/linux/ext2_fs.h     | 64 +++++++++++++++++++++++++--------------------
 include/linux/ext3_fs.h     | 20 +++++++++++---
 include/linux/fs.h          | 39 +++++++++++++++++++++++++++
 include/linux/reiserfs_fs.h | 28 +++++++++-----------
 9 files changed, 124 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index b0ea6687ab55..e34c7db00f6f 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -22,7 +22,6 @@
  */
 
 #include <linux/fs.h>
-#include <linux/ext2_fs.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
 #include "cifsproto.h"
@@ -74,7 +73,7 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
 			}
 			break;
 #ifdef CONFIG_CIFS_POSIX
-		case EXT2_IOC_GETFLAGS:
+		case FS_IOC_GETFLAGS:
 			if(CIFS_UNIX_EXTATTR_CAP & caps) {
 				if (pSMBFile == NULL)
 					break;
@@ -82,12 +81,12 @@ int cifs_ioctl (struct inode * inode, struct file * filep,
 					&ExtAttrBits, &ExtAttrMask);
 				if(rc == 0)
 					rc = put_user(ExtAttrBits &
-						EXT2_FL_USER_VISIBLE,
+						FS_FL_USER_VISIBLE,
 						(int __user *)arg);
 			}
 			break;
 
-		case EXT2_IOC_SETFLAGS:
+		case FS_IOC_SETFLAGS:
 			if(CIFS_UNIX_EXTATTR_CAP & caps) {
 				if(get_user(ExtAttrBits,(int __user *)arg)) {
 					rc = -EFAULT;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 3b0cf7fbd95a..bd9c4f49d4e5 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -123,21 +123,6 @@
 #include <linux/dvb/video.h>
 #include <linux/lp.h>
 
-/* Aiee. Someone does not find a difference between int and long */
-#define EXT2_IOC32_GETFLAGS               _IOR('f', 1, int)
-#define EXT2_IOC32_SETFLAGS               _IOW('f', 2, int)
-#define EXT3_IOC32_GETVERSION             _IOR('f', 3, int)
-#define EXT3_IOC32_SETVERSION             _IOW('f', 4, int)
-#define EXT3_IOC32_GETRSVSZ               _IOR('f', 5, int)
-#define EXT3_IOC32_SETRSVSZ               _IOW('f', 6, int)
-#define EXT3_IOC32_GROUP_EXTEND           _IOW('f', 7, unsigned int)
-#ifdef CONFIG_JBD_DEBUG
-#define EXT3_IOC32_WAIT_FOR_READONLY      _IOR('f', 99, int)
-#endif
-
-#define EXT2_IOC32_GETVERSION             _IOR('v', 1, int)
-#define EXT2_IOC32_SETVERSION             _IOW('v', 2, int)
-
 static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
 			      unsigned long arg, struct file *f)
 {
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 8a1ca5ef7ada..3915635b4470 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -246,12 +246,8 @@ struct hfsplus_readdir_data {
 
 /* ext2 ioctls (EXT2_IOC_GETFLAGS and EXT2_IOC_SETFLAGS) to support
  * chattr/lsattr */
-#define HFSPLUS_IOC_EXT2_GETFLAGS	_IOR('f', 1, long)
-#define HFSPLUS_IOC_EXT2_SETFLAGS	_IOW('f', 2, long)
-
-#define EXT2_FLAG_IMMUTABLE		0x00000010 /* Immutable file */
-#define EXT2_FLAG_APPEND		0x00000020 /* writes to file may only append */
-#define EXT2_FLAG_NODUMP		0x00000040 /* do not dump file */
+#define HFSPLUS_IOC_EXT2_GETFLAGS	FS_IOC_GETFLAGS
+#define HFSPLUS_IOC_EXT2_SETFLAGS	FS_IOC_SETFLAGS
 
 
 /*
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 13cf848ac833..79fd10402ea3 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -28,11 +28,11 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 	case HFSPLUS_IOC_EXT2_GETFLAGS:
 		flags = 0;
 		if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE)
-			flags |= EXT2_FLAG_IMMUTABLE; /* EXT2_IMMUTABLE_FL */
+			flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */
 		if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND)
-			flags |= EXT2_FLAG_APPEND; /* EXT2_APPEND_FL */
+			flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */
 		if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP)
-			flags |= EXT2_FLAG_NODUMP; /* EXT2_NODUMP_FL */
+			flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
 		return put_user(flags, (int __user *)arg);
 	case HFSPLUS_IOC_EXT2_SETFLAGS: {
 		if (IS_RDONLY(inode))
@@ -44,32 +44,31 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 		if (get_user(flags, (int __user *)arg))
 			return -EFAULT;
 
-		if (flags & (EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND) ||
+		if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
 		    HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
 			if (!capable(CAP_LINUX_IMMUTABLE))
 				return -EPERM;
 		}
 
 		/* don't silently ignore unsupported ext2 flags */
-		if (flags & ~(EXT2_FLAG_IMMUTABLE|EXT2_FLAG_APPEND|
-			      EXT2_FLAG_NODUMP))
+		if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL))
 			return -EOPNOTSUPP;
 
-		if (flags & EXT2_FLAG_IMMUTABLE) { /* EXT2_IMMUTABLE_FL */
+		if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
 			inode->i_flags |= S_IMMUTABLE;
 			HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
 		} else {
 			inode->i_flags &= ~S_IMMUTABLE;
 			HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE;
 		}
-		if (flags & EXT2_FLAG_APPEND) { /* EXT2_APPEND_FL */
+		if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */
 			inode->i_flags |= S_APPEND;
 			HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND;
 		} else {
 			inode->i_flags &= ~S_APPEND;
 			HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND;
 		}
-		if (flags & EXT2_FLAG_NODUMP) /* EXT2_NODUMP_FL */
+		if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */
 			HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP;
 		else
 			HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP;
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 67b3774820eb..37db52488262 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/fs.h>
-#include <linux/ext2_fs.h>
 #include <linux/ctype.h>
 #include <linux/capability.h>
 #include <linux/time.h>
@@ -22,13 +21,13 @@ static struct {
 	long jfs_flag;
 	long ext2_flag;
 } jfs_map[] = {
-	{JFS_NOATIME_FL, EXT2_NOATIME_FL},
-	{JFS_DIRSYNC_FL, EXT2_DIRSYNC_FL},
-	{JFS_SYNC_FL, EXT2_SYNC_FL},
-	{JFS_SECRM_FL, EXT2_SECRM_FL},
-	{JFS_UNRM_FL, EXT2_UNRM_FL},
-	{JFS_APPEND_FL, EXT2_APPEND_FL},
-	{JFS_IMMUTABLE_FL, EXT2_IMMUTABLE_FL},
+	{JFS_NOATIME_FL,	FS_NOATIME_FL},
+	{JFS_DIRSYNC_FL,	FS_DIRSYNC_FL},
+	{JFS_SYNC_FL,		FS_SYNC_FL},
+	{JFS_SECRM_FL,		FS_SECRM_FL},
+	{JFS_UNRM_FL,		FS_UNRM_FL},
+	{JFS_APPEND_FL,		FS_APPEND_FL},
+	{JFS_IMMUTABLE_FL,	FS_IMMUTABLE_FL},
 	{0, 0},
 };
 
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 33a1aa107329..153d755376a4 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -165,41 +165,49 @@ struct ext2_group_desc
 #define	EXT2_N_BLOCKS			(EXT2_TIND_BLOCK + 1)
 
 /*
- * Inode flags
+ * Inode flags (GETFLAGS/SETFLAGS)
  */
-#define	EXT2_SECRM_FL			0x00000001 /* Secure deletion */
-#define	EXT2_UNRM_FL			0x00000002 /* Undelete */
-#define	EXT2_COMPR_FL			0x00000004 /* Compress file */
-#define EXT2_SYNC_FL			0x00000008 /* Synchronous updates */
-#define EXT2_IMMUTABLE_FL		0x00000010 /* Immutable file */
-#define EXT2_APPEND_FL			0x00000020 /* writes to file may only append */
-#define EXT2_NODUMP_FL			0x00000040 /* do not dump file */
-#define EXT2_NOATIME_FL			0x00000080 /* do not update atime */
+#define	EXT2_SECRM_FL			FS_SECRM_FL	/* Secure deletion */
+#define	EXT2_UNRM_FL			FS_UNRM_FL	/* Undelete */
+#define	EXT2_COMPR_FL			FS_COMPR_FL	/* Compress file */
+#define EXT2_SYNC_FL			FS_SYNC_FL	/* Synchronous updates */
+#define EXT2_IMMUTABLE_FL		FS_IMMUTABLE_FL	/* Immutable file */
+#define EXT2_APPEND_FL			FS_APPEND_FL	/* writes to file may only append */
+#define EXT2_NODUMP_FL			FS_NODUMP_FL	/* do not dump file */
+#define EXT2_NOATIME_FL			FS_NOATIME_FL	/* do not update atime */
 /* Reserved for compression usage... */
-#define EXT2_DIRTY_FL			0x00000100
-#define EXT2_COMPRBLK_FL		0x00000200 /* One or more compressed clusters */
-#define EXT2_NOCOMP_FL			0x00000400 /* Don't compress */
-#define EXT2_ECOMPR_FL			0x00000800 /* Compression error */
+#define EXT2_DIRTY_FL			FS_DIRTY_FL
+#define EXT2_COMPRBLK_FL		FS_COMPRBLK_FL	/* One or more compressed clusters */
+#define EXT2_NOCOMP_FL			FS_NOCOMP_FL	/* Don't compress */
+#define EXT2_ECOMPR_FL			FS_ECOMPR_FL	/* Compression error */
 /* End compression flags --- maybe not all used */	
-#define EXT2_BTREE_FL			0x00001000 /* btree format dir */
-#define EXT2_INDEX_FL			0x00001000 /* hash-indexed directory */
-#define EXT2_IMAGIC_FL			0x00002000 /* AFS directory */
-#define EXT2_JOURNAL_DATA_FL		0x00004000 /* Reserved for ext3 */
-#define EXT2_NOTAIL_FL			0x00008000 /* file tail should not be merged */
-#define EXT2_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
-#define EXT2_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
-#define EXT2_RESERVED_FL		0x80000000 /* reserved for ext2 lib */
-
-#define EXT2_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
-#define EXT2_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
+#define EXT2_BTREE_FL			FS_BTREE_FL	/* btree format dir */
+#define EXT2_INDEX_FL			FS_INDEX_FL	/* hash-indexed directory */
+#define EXT2_IMAGIC_FL			FS_IMAGIC_FL	/* AFS directory */
+#define EXT2_JOURNAL_DATA_FL		FS_JOURNAL_DATA_FL /* Reserved for ext3 */
+#define EXT2_NOTAIL_FL			FS_NOTAIL_FL	/* file tail should not be merged */
+#define EXT2_DIRSYNC_FL			FS_DIRSYNC_FL	/* dirsync behaviour (directories only) */
+#define EXT2_TOPDIR_FL			FS_TOPDIR_FL	/* Top of directory hierarchies*/
+#define EXT2_RESERVED_FL		FS_RESERVED_FL	/* reserved for ext2 lib */
+
+#define EXT2_FL_USER_VISIBLE		FS_FL_USER_VISIBLE	/* User visible flags */
+#define EXT2_FL_USER_MODIFIABLE		FS_FL_USER_MODIFIABLE	/* User modifiable flags */
 
 /*
  * ioctl commands
  */
-#define	EXT2_IOC_GETFLAGS		_IOR('f', 1, long)
-#define	EXT2_IOC_SETFLAGS		_IOW('f', 2, long)
-#define	EXT2_IOC_GETVERSION		_IOR('v', 1, long)
-#define	EXT2_IOC_SETVERSION		_IOW('v', 2, long)
+#define	EXT2_IOC_GETFLAGS		FS_IOC_GETFLAGS
+#define	EXT2_IOC_SETFLAGS		FS_IOC_SETFLAGS
+#define	EXT2_IOC_GETVERSION		FS_IOC_GETVERSION
+#define	EXT2_IOC_SETVERSION		FS_IOC_SETVERSION
+
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT2_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
+#define EXT2_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
+#define EXT2_IOC32_GETVERSION		FS_IOC32_GETVERSION
+#define EXT2_IOC32_SETVERSION		FS_IOC32_SETVERSION
 
 /*
  * Structure of an inode on the disk
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index cc08f56750da..a7a01ff44669 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -216,20 +216,32 @@ struct ext3_new_group_data {
 /*
  * ioctl commands
  */
-#define	EXT3_IOC_GETFLAGS		_IOR('f', 1, long)
-#define	EXT3_IOC_SETFLAGS		_IOW('f', 2, long)
+#define	EXT3_IOC_GETFLAGS		FS_IOC_GETFLAGS
+#define	EXT3_IOC_SETFLAGS		FS_IOC_SETFLAGS
 #define	EXT3_IOC_GETVERSION		_IOR('f', 3, long)
 #define	EXT3_IOC_SETVERSION		_IOW('f', 4, long)
 #define EXT3_IOC_GROUP_EXTEND		_IOW('f', 7, unsigned long)
 #define EXT3_IOC_GROUP_ADD		_IOW('f', 8,struct ext3_new_group_input)
-#define	EXT3_IOC_GETVERSION_OLD		_IOR('v', 1, long)
-#define	EXT3_IOC_SETVERSION_OLD		_IOW('v', 2, long)
+#define	EXT3_IOC_GETVERSION_OLD		FS_IOC_GETVERSION
+#define	EXT3_IOC_SETVERSION_OLD		FS_IOC_SETVERSION
 #ifdef CONFIG_JBD_DEBUG
 #define EXT3_IOC_WAIT_FOR_READONLY	_IOR('f', 99, long)
 #endif
 #define EXT3_IOC_GETRSVSZ		_IOR('f', 5, long)
 #define EXT3_IOC_SETRSVSZ		_IOW('f', 6, long)
 
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT3_IOC32_GETVERSION		_IOR('f', 3, int)
+#define EXT3_IOC32_SETVERSION		_IOW('f', 4, int)
+#define EXT3_IOC32_GETRSVSZ		_IOR('f', 5, int)
+#define EXT3_IOC32_SETRSVSZ		_IOW('f', 6, int)
+#define EXT3_IOC32_GROUP_EXTEND		_IOW('f', 7, unsigned int)
+#ifdef CONFIG_JBD_DEBUG
+#define EXT3_IOC32_WAIT_FOR_READONLY	_IOR('f', 99, int)
+#endif
+
 /*
  *  Mount options
  */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1728142ec4b6..b73a47582dbe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -217,6 +217,45 @@ extern int dir_notify_enable;
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
 
+#define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
+#define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
+#define	FS_IOC_GETVERSION		_IOR('v', 1, long)
+#define	FS_IOC_SETVERSION		_IOW('v', 2, long)
+#define FS_IOC32_GETFLAGS		_IOR('f', 1, int)
+#define FS_IOC32_SETFLAGS		_IOW('f', 2, int)
+#define FS_IOC32_GETVERSION		_IOR('v', 1, int)
+#define FS_IOC32_SETVERSION		_IOW('v', 2, int)
+
+/*
+ * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ */
+#define	FS_SECRM_FL			0x00000001 /* Secure deletion */
+#define	FS_UNRM_FL			0x00000002 /* Undelete */
+#define	FS_COMPR_FL			0x00000004 /* Compress file */
+#define FS_SYNC_FL			0x00000008 /* Synchronous updates */
+#define FS_IMMUTABLE_FL			0x00000010 /* Immutable file */
+#define FS_APPEND_FL			0x00000020 /* writes to file may only append */
+#define FS_NODUMP_FL			0x00000040 /* do not dump file */
+#define FS_NOATIME_FL			0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define FS_DIRTY_FL			0x00000100
+#define FS_COMPRBLK_FL			0x00000200 /* One or more compressed clusters */
+#define FS_NOCOMP_FL			0x00000400 /* Don't compress */
+#define FS_ECOMPR_FL			0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */
+#define FS_BTREE_FL			0x00001000 /* btree format dir */
+#define FS_INDEX_FL			0x00001000 /* hash-indexed directory */
+#define FS_IMAGIC_FL			0x00002000 /* AFS directory */
+#define FS_JOURNAL_DATA_FL		0x00004000 /* Reserved for ext3 */
+#define FS_NOTAIL_FL			0x00008000 /* file tail should not be merged */
+#define FS_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+#define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
+#define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
+
+#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
+
+
 #define SYNC_FILE_RANGE_WAIT_BEFORE	1
 #define SYNC_FILE_RANGE_WRITE		2
 #define SYNC_FILE_RANGE_WAIT_AFTER	4
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 28493ffaafe7..0100d6d1d84c 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -807,21 +807,19 @@ struct stat_data_v1 {
 #define set_sd_v1_first_direct_byte(sdp,v) \
                                 ((sdp)->sd_first_direct_byte = cpu_to_le32(v))
 
-#include <linux/ext2_fs.h>
-
 /* inode flags stored in sd_attrs (nee sd_reserved) */
 
 /* we want common flags to have the same values as in ext2,
    so chattr(1) will work without problems */
-#define REISERFS_IMMUTABLE_FL EXT2_IMMUTABLE_FL
-#define REISERFS_APPEND_FL    EXT2_APPEND_FL
-#define REISERFS_SYNC_FL      EXT2_SYNC_FL
-#define REISERFS_NOATIME_FL   EXT2_NOATIME_FL
-#define REISERFS_NODUMP_FL    EXT2_NODUMP_FL
-#define REISERFS_SECRM_FL     EXT2_SECRM_FL
-#define REISERFS_UNRM_FL      EXT2_UNRM_FL
-#define REISERFS_COMPR_FL     EXT2_COMPR_FL
-#define REISERFS_NOTAIL_FL    EXT2_NOTAIL_FL
+#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
+#define REISERFS_APPEND_FL    FS_APPEND_FL
+#define REISERFS_SYNC_FL      FS_SYNC_FL
+#define REISERFS_NOATIME_FL   FS_NOATIME_FL
+#define REISERFS_NODUMP_FL    FS_NODUMP_FL
+#define REISERFS_SECRM_FL     FS_SECRM_FL
+#define REISERFS_UNRM_FL      FS_UNRM_FL
+#define REISERFS_COMPR_FL     FS_COMPR_FL
+#define REISERFS_NOTAIL_FL    FS_NOTAIL_FL
 
 /* persistent flags that file inherits from the parent directory */
 #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL |	\
@@ -2168,10 +2166,10 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp,
 #define REISERFS_IOC_UNPACK		_IOW(0xCD,1,long)
 /* define following flags to be the same as in ext2, so that chattr(1),
    lsattr(1) will work with us. */
-#define REISERFS_IOC_GETFLAGS		EXT2_IOC_GETFLAGS
-#define REISERFS_IOC_SETFLAGS		EXT2_IOC_SETFLAGS
-#define REISERFS_IOC_GETVERSION		EXT2_IOC_GETVERSION
-#define REISERFS_IOC_SETVERSION		EXT2_IOC_SETVERSION
+#define REISERFS_IOC_GETFLAGS		FS_IOC_GETFLAGS
+#define REISERFS_IOC_SETFLAGS		FS_IOC_SETFLAGS
+#define REISERFS_IOC_GETVERSION		FS_IOC_GETVERSION
+#define REISERFS_IOC_SETVERSION		FS_IOC_SETVERSION
 
 /* Locking primitives */
 /* Right now we are still falling back to (un)lock_kernel, but eventually that
-- 
cgit v1.2.3


From 52b499c438ff60991eb3855ca090782569b3e8cf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 29 Aug 2006 19:06:18 +0100
Subject: [PATCH] BLOCK: Move the ReiserFS device ioctl compat stuff to the
 ReiserFS driver [try #6]

Move the ReiserFS device ioctl compat stuff from fs/compat_ioctl.c to the
ReiserFS driver so that the ReiserFS header file doesn't need to be included.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/compat_ioctl.c           | 12 ------------
 fs/reiserfs/dir.c           |  3 +++
 fs/reiserfs/file.c          |  4 ++++
 fs/reiserfs/ioctl.c         | 35 +++++++++++++++++++++++++++++++++++
 include/linux/reiserfs_fs.h |  9 +++++++++
 5 files changed, 51 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index bd9c4f49d4e5..0346f2ab57c4 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -59,7 +59,6 @@
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <linux/serial.h>
-#include <linux/reiserfs_fs.h>
 #include <linux/if_tun.h>
 #include <linux/ctype.h>
 #include <linux/ioctl32.h>
@@ -2014,16 +2013,6 @@ static int vfat_ioctl32(unsigned fd, unsigned cmd, unsigned long arg)
 	return ret;
 }
 
-#define REISERFS_IOC_UNPACK32               _IOW(0xCD,1,int)
-
-static int reiserfs_ioctl32(unsigned fd, unsigned cmd, unsigned long ptr)
-{
-        if (cmd == REISERFS_IOC_UNPACK32)
-                cmd = REISERFS_IOC_UNPACK;
-
-        return sys_ioctl(fd,cmd,ptr);
-}
-
 struct raw32_config_request
 {
         compat_int_t    raw_minor;
@@ -2784,7 +2773,6 @@ HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
 /* vfat */
 HANDLE_IOCTL(VFAT_IOCTL_READDIR_BOTH32, vfat_ioctl32)
 HANDLE_IOCTL(VFAT_IOCTL_READDIR_SHORT32, vfat_ioctl32)
-HANDLE_IOCTL(REISERFS_IOC_UNPACK32, reiserfs_ioctl32)
 /* Raw devices */
 HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
 HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 9aabcc0ccd2d..657050ad7430 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -22,6 +22,9 @@ const struct file_operations reiserfs_dir_operations = {
 	.readdir = reiserfs_readdir,
 	.fsync = reiserfs_dir_fsync,
 	.ioctl = reiserfs_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = reiserfs_compat_ioctl,
+#endif
 };
 
 static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 1cfbe857ba27..3e08f7161a3d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -2,6 +2,7 @@
  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
  */
 
+#include <linux/config.h>
 #include <linux/time.h>
 #include <linux/reiserfs_fs.h>
 #include <linux/reiserfs_acl.h>
@@ -1568,6 +1569,9 @@ const struct file_operations reiserfs_file_operations = {
 	.read = generic_file_read,
 	.write = reiserfs_file_write,
 	.ioctl = reiserfs_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = reiserfs_compat_ioctl,
+#endif
 	.mmap = generic_file_mmap,
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index a986b5e1e288..9c57578cb831 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -9,6 +9,7 @@
 #include <asm/uaccess.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
+#include <linux/compat.h>
 
 static int reiserfs_unpack(struct inode *inode, struct file *filp);
 
@@ -94,6 +95,40 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 	}
 }
 
+#ifdef CONFIG_COMPAT
+long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	int ret;
+
+	/* These are just misnamed, they actually get/put from/to user an int */
+	switch (cmd) {
+	case REISERFS_IOC32_UNPACK:
+		cmd = REISERFS_IOC_UNPACK;
+		break;
+	case REISERFS_IOC32_GETFLAGS:
+		cmd = REISERFS_IOC_GETFLAGS;
+		break;
+	case REISERFS_IOC32_SETFLAGS:
+		cmd = REISERFS_IOC_SETFLAGS;
+		break;
+	case REISERFS_IOC32_GETVERSION:
+		cmd = REISERFS_IOC_GETVERSION;
+		break;
+	case REISERFS_IOC32_SETVERSION:
+		cmd = REISERFS_IOC_SETVERSION;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+	lock_kernel();
+	ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+	unlock_kernel();
+	return ret;
+}
+#endif
+
 /*
 ** reiserfs_unpack
 ** Function try to convert tail from direct item into indirect.
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 0100d6d1d84c..9c63abffd7b2 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2161,6 +2161,8 @@ __u32 r5_hash(const signed char *msg, int len);
 /* prototypes from ioctl.c */
 int reiserfs_ioctl(struct inode *inode, struct file *filp,
 		   unsigned int cmd, unsigned long arg);
+long reiserfs_compat_ioctl(struct file *filp,
+		   unsigned int cmd, unsigned long arg);
 
 /* ioctl's command */
 #define REISERFS_IOC_UNPACK		_IOW(0xCD,1,long)
@@ -2171,6 +2173,13 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp,
 #define REISERFS_IOC_GETVERSION		FS_IOC_GETVERSION
 #define REISERFS_IOC_SETVERSION		FS_IOC_SETVERSION
 
+/* the 32 bit compat definitions with int argument */
+#define REISERFS_IOC32_UNPACK		_IOW(0xCD, 1, int)
+#define REISERFS_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
+#define REISERFS_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
+#define REISERFS_IOC32_GETVERSION	FS_IOC32_GETVERSION
+#define REISERFS_IOC32_SETVERSION	FS_IOC32_SETVERSION
+
 /* Locking primitives */
 /* Right now we are still falling back to (un)lock_kernel, but eventually that
    would evolve into real per-fs locks */
-- 
cgit v1.2.3


From 52a700c5675f399c07e6e57328291e57f13ef3bb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 29 Aug 2006 19:06:23 +0100
Subject: [PATCH] BLOCK: Move the Ext3 device ioctl compat stuff to the Ext3
 driver [try #6]

Move the Ext3 device ioctl compat stuff from fs/compat_ioctl.c to the Ext3
driver so that the Ext3 header file doesn't need to be included.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/compat_ioctl.c       | 27 ------------------------
 fs/ext3/dir.c           |  3 +++
 fs/ext3/file.c          |  3 +++
 fs/ext3/ioctl.c         | 55 ++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/ext3_fs.h |  6 ++++++
 5 files changed, 66 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 3594668559af..e5eb0f10f05a 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -45,8 +45,6 @@
 #include <linux/tty.h>
 #include <linux/vt_kern.h>
 #include <linux/fb.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
 #include <linux/videodev.h>
 #include <linux/netdevice.h>
 #include <linux/raw.h>
@@ -158,22 +156,6 @@ static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-static int do_ext3_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	/* These are just misnamed, they actually get/put from/to user an int */
-	switch (cmd) {
-	case EXT3_IOC32_GETVERSION: cmd = EXT3_IOC_GETVERSION; break;
-	case EXT3_IOC32_SETVERSION: cmd = EXT3_IOC_SETVERSION; break;
-	case EXT3_IOC32_GETRSVSZ: cmd = EXT3_IOC_GETRSVSZ; break;
-	case EXT3_IOC32_SETRSVSZ: cmd = EXT3_IOC_SETRSVSZ; break;
-	case EXT3_IOC32_GROUP_EXTEND: cmd = EXT3_IOC_GROUP_EXTEND; break;
-#ifdef CONFIG_JBD_DEBUG
-	case EXT3_IOC32_WAIT_FOR_READONLY: cmd = EXT3_IOC_WAIT_FOR_READONLY; break;
-#endif
-	}
-	return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
-}
-
 struct compat_video_event {
 	int32_t		type;
 	compat_time_t	timestamp;
@@ -2712,15 +2694,6 @@ HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl)
 HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
 HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
 #endif
-HANDLE_IOCTL(EXT3_IOC32_GETVERSION, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_SETVERSION, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GETRSVSZ, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_SETRSVSZ, do_ext3_ioctl)
-HANDLE_IOCTL(EXT3_IOC32_GROUP_EXTEND, do_ext3_ioctl)
-COMPATIBLE_IOCTL(EXT3_IOC_GROUP_ADD)
-#ifdef CONFIG_JBD_DEBUG
-HANDLE_IOCTL(EXT3_IOC32_WAIT_FOR_READONLY, do_ext3_ioctl)
-#endif
 /* One SMB ioctl needs translations. */
 #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
 HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 429acbb4e064..d0b54f30b914 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -44,6 +44,9 @@ const struct file_operations ext3_dir_operations = {
 	.read		= generic_read_dir,
 	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
 	.ioctl		= ext3_ioctl,		/* BKL held */
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext3_compat_ioctl,
+#endif
 	.fsync		= ext3_sync_file,	/* BKL held */
 #ifdef CONFIG_EXT3_INDEX
 	.release	= ext3_release_dir,
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 994efd189f4e..74ff20f9d09b 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -114,6 +114,9 @@ const struct file_operations ext3_file_operations = {
 	.readv		= generic_file_readv,
 	.writev		= generic_file_writev,
 	.ioctl		= ext3_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext3_compat_ioctl,
+#endif
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
 	.release	= ext3_release_file,
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 3a6b012d120c..12daa6869572 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -13,9 +13,10 @@
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
 #include <linux/time.h>
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
-
 int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		unsigned long arg)
 {
@@ -252,3 +253,55 @@ flags_err:
 		return -ENOTTY;
 	}
 }
+
+#ifdef CONFIG_COMPAT
+long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	int ret;
+
+	/* These are just misnamed, they actually get/put from/to user an int */
+	switch (cmd) {
+	case EXT3_IOC32_GETFLAGS:
+		cmd = EXT3_IOC_GETFLAGS;
+		break;
+	case EXT3_IOC32_SETFLAGS:
+		cmd = EXT3_IOC_SETFLAGS;
+		break;
+	case EXT3_IOC32_GETVERSION:
+		cmd = EXT3_IOC_GETVERSION;
+		break;
+	case EXT3_IOC32_SETVERSION:
+		cmd = EXT3_IOC_SETVERSION;
+		break;
+	case EXT3_IOC32_GROUP_EXTEND:
+		cmd = EXT3_IOC_GROUP_EXTEND;
+		break;
+	case EXT3_IOC32_GETVERSION_OLD:
+		cmd = EXT3_IOC_GETVERSION_OLD;
+		break;
+	case EXT3_IOC32_SETVERSION_OLD:
+		cmd = EXT3_IOC_SETVERSION_OLD;
+		break;
+#ifdef CONFIG_JBD_DEBUG
+	case EXT3_IOC32_WAIT_FOR_READONLY:
+		cmd = EXT3_IOC_WAIT_FOR_READONLY;
+		break;
+#endif
+	case EXT3_IOC32_GETRSVSZ:
+		cmd = EXT3_IOC_GETRSVSZ;
+		break;
+	case EXT3_IOC32_SETRSVSZ:
+		cmd = EXT3_IOC_SETRSVSZ;
+		break;
+	case EXT3_IOC_GROUP_ADD:
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+	lock_kernel();
+	ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+	unlock_kernel();
+	return ret;
+}
+#endif
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index a7a01ff44669..11cca1bdc0c7 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -233,6 +233,8 @@ struct ext3_new_group_data {
 /*
  * ioctl commands in 32 bit emulation
  */
+#define EXT3_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
+#define EXT3_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
 #define EXT3_IOC32_GETVERSION		_IOR('f', 3, int)
 #define EXT3_IOC32_SETVERSION		_IOW('f', 4, int)
 #define EXT3_IOC32_GETRSVSZ		_IOR('f', 5, int)
@@ -241,6 +243,9 @@ struct ext3_new_group_data {
 #ifdef CONFIG_JBD_DEBUG
 #define EXT3_IOC32_WAIT_FOR_READONLY	_IOR('f', 99, int)
 #endif
+#define EXT3_IOC32_GETVERSION_OLD	FS_IOC32_GETVERSION
+#define EXT3_IOC32_SETVERSION_OLD	FS_IOC32_SETVERSION
+
 
 /*
  *  Mount options
@@ -824,6 +829,7 @@ extern void ext3_set_aops(struct inode *inode);
 /* ioctl.c */
 extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
 		       unsigned long);
+extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long);
 
 /* namei.c */
 extern int ext3_orphan_add(handle_t *, struct inode *);
-- 
cgit v1.2.3


From 9361401eb7619c033e2394e4f9f6d410d6719ac7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 30 Sep 2006 20:45:40 +0200
Subject: [PATCH] BLOCK: Make it possible to disable the block layer [try #6]

Make it possible to disable the block layer.  Not all embedded devices require
it, some can make do with just JFFS2, NFS, ramfs, etc - none of which require
the block layer to be present.

This patch does the following:

 (*) Introduces CONFIG_BLOCK to disable the block layer, buffering and blockdev
     support.

 (*) Adds dependencies on CONFIG_BLOCK to any configuration item that controls
     an item that uses the block layer.  This includes:

     (*) Block I/O tracing.

     (*) Disk partition code.

     (*) All filesystems that are block based, eg: Ext3, ReiserFS, ISOFS.

     (*) The SCSI layer.  As far as I can tell, even SCSI chardevs use the
     	 block layer to do scheduling.  Some drivers that use SCSI facilities -
     	 such as USB storage - end up disabled indirectly from this.

     (*) Various block-based device drivers, such as IDE and the old CDROM
     	 drivers.

     (*) MTD blockdev handling and FTL.

     (*) JFFS - which uses set_bdev_super(), something it could avoid doing by
     	 taking a leaf out of JFFS2's book.

 (*) Makes most of the contents of linux/blkdev.h, linux/buffer_head.h and
     linux/elevator.h contingent on CONFIG_BLOCK being set.  sector_div() is,
     however, still used in places, and so is still available.

 (*) Also made contingent are the contents of linux/mpage.h, linux/genhd.h and
     parts of linux/fs.h.

 (*) Makes a number of files in fs/ contingent on CONFIG_BLOCK.

 (*) Makes mm/bounce.c (bounce buffering) contingent on CONFIG_BLOCK.

 (*) set_page_dirty() doesn't call __set_page_dirty_buffers() if CONFIG_BLOCK
     is not enabled.

 (*) fs/no-block.c is created to hold out-of-line stubs and things that are
     required when CONFIG_BLOCK is not set:

     (*) Default blockdev file operations (to give error ENODEV on opening).

 (*) Makes some /proc changes:

     (*) /proc/devices does not list any blockdevs.

     (*) /proc/diskstats and /proc/partitions are contingent on CONFIG_BLOCK.

 (*) Makes some compat ioctl handling contingent on CONFIG_BLOCK.

 (*) If CONFIG_BLOCK is not defined, makes sys_quotactl() return -ENODEV if
     given command other than Q_SYNC or if a special device is specified.

 (*) In init/do_mounts.c, no reference is made to the blockdev routines if
     CONFIG_BLOCK is not defined.  This does not prohibit NFS roots or JFFS2.

 (*) The bdflush, ioprio_set and ioprio_get syscalls can now be absent (return
     error ENOSYS by way of cond_syscall if so).

 (*) The seclvl_bd_claim() and seclvl_bd_release() security calls do nothing if
     CONFIG_BLOCK is not set, since they can't then happen.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig                | 20 ++++++++++++++++++
 block/Kconfig.iosched        |  3 +++
 block/Makefile               |  2 +-
 drivers/block/Kconfig        |  4 ++++
 drivers/cdrom/Kconfig        |  2 +-
 drivers/char/Kconfig         |  1 +
 drivers/char/random.c        |  4 ++++
 drivers/ide/Kconfig          |  4 ++++
 drivers/md/Kconfig           |  3 +++
 drivers/message/i2o/Kconfig  |  2 +-
 drivers/mmc/Kconfig          |  2 +-
 drivers/mmc/Makefile         |  3 ++-
 drivers/mtd/Kconfig          | 12 +++++------
 drivers/mtd/devices/Kconfig  |  2 +-
 drivers/s390/block/Kconfig   |  2 +-
 drivers/scsi/Kconfig         |  2 ++
 fs/Kconfig                   | 31 ++++++++++++++++++++-------
 fs/Makefile                  | 14 +++++++++----
 fs/compat_ioctl.c            | 18 ++++++++++++++++
 fs/internal.h                |  6 ++++++
 fs/no-block.c                | 22 +++++++++++++++++++
 fs/partitions/Makefile       |  2 +-
 fs/proc/proc_misc.c          | 11 +++++++++-
 fs/quota.c                   | 44 ++++++++++++++++++++++++++------------
 fs/super.c                   |  4 ++++
 fs/xfs/Kconfig               |  1 +
 include/linux/blkdev.h       | 50 +++++++++++++++++++++++++++++++-------------
 include/linux/buffer_head.h  | 16 ++++++++++++++
 include/linux/compat_ioctl.h |  2 ++
 include/linux/elevator.h     |  3 +++
 include/linux/fs.h           | 25 +++++++++++++++++++---
 include/linux/genhd.h        |  4 ++++
 include/linux/mpage.h        |  3 +++
 include/linux/raid/md.h      |  3 +++
 include/linux/raid/md_k.h    |  3 +++
 include/scsi/scsi_tcq.h      |  3 ++-
 init/Kconfig                 |  2 +-
 init/do_mounts.c             | 13 +++++++++++-
 kernel/sys_ni.c              |  5 +++++
 mm/Makefile                  |  2 +-
 mm/filemap.c                 |  4 ++++
 mm/migrate.c                 |  2 ++
 mm/page-writeback.c          |  8 ++++---
 mm/truncate.c                |  2 ++
 44 files changed, 308 insertions(+), 63 deletions(-)
 create mode 100644 fs/no-block.c

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index b6f5f0a79655..9af6c614dfde 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -1,6 +1,24 @@
 #
 # Block layer core configuration
 #
+config BLOCK
+       bool "Enable the block layer"
+       default y
+       help
+	 This permits the block layer to be removed from the kernel if it's not
+	 needed (on some embedded devices for example).  If this option is
+	 disabled, then blockdev files will become unusable and some
+	 filesystems (such as ext3) will become unavailable.
+
+	 This option will also disable SCSI character devices and USB storage
+	 since they make use of various block layer definitions and
+	 facilities.
+
+	 Say Y here unless you know you really don't want to mount disks and
+	 suchlike.
+
+if BLOCK
+
 #XXX - it makes sense to enable this only for 32-bit subarch's, not for x86_64
 #for instance.
 config LBD
@@ -33,4 +51,6 @@ config LSF
 
 	  If unsure, say Y.
 
+endif
+
 source block/Kconfig.iosched
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 48d090e266fc..903f0d3b6852 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -1,3 +1,4 @@
+if BLOCK
 
 menu "IO Schedulers"
 
@@ -67,3 +68,5 @@ config DEFAULT_IOSCHED
 	default "noop" if DEFAULT_NOOP
 
 endmenu
+
+endif
diff --git a/block/Makefile b/block/Makefile
index c05de0e0037f..4b84d0d5947b 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the kernel block layer
 #
 
-obj-y	:= elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
+obj-$(CONFIG_BLOCK) := elevator.o ll_rw_blk.o ioctl.o genhd.o scsi_ioctl.o
 
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_AS)	+= as-iosched.o
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index b5382cedf0c0..422e31d5f8e5 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -2,6 +2,8 @@
 # Block device driver configuration
 #
 
+if BLOCK
+
 menu "Block devices"
 
 config BLK_DEV_FD
@@ -468,3 +470,5 @@ config ATA_OVER_ETH
 	devices like the Coraid EtherDrive (R) Storage Blade.
 
 endmenu
+
+endif
diff --git a/drivers/cdrom/Kconfig b/drivers/cdrom/Kconfig
index ff5652d40619..4b12e9031fb3 100644
--- a/drivers/cdrom/Kconfig
+++ b/drivers/cdrom/Kconfig
@@ -3,7 +3,7 @@
 #
 
 menu "Old CD-ROM drivers (not SCSI, not IDE)"
-	depends on ISA
+	depends on ISA && BLOCK
 
 config CD_NO_IDESCSI
 	bool "Support non-SCSI/IDE/ATAPI CDROM drives"
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 4cc619edf424..bde1c665d9f4 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -1006,6 +1006,7 @@ config GPIO_VR41XX
 
 config RAW_DRIVER
 	tristate "RAW driver (/dev/raw/rawN) (OBSOLETE)"
+	depends on BLOCK
 	help
 	  The raw driver permits block devices to be bound to /dev/raw/rawN. 
 	  Once bound, I/O against /dev/raw/rawN uses efficient zero-copy I/O. 
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 4c3a5ca9d8f7..b430a12eb819 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -655,6 +655,7 @@ void add_interrupt_randomness(int irq)
 	add_timer_randomness(irq_timer_state[irq], 0x100 + irq);
 }
 
+#ifdef CONFIG_BLOCK
 void add_disk_randomness(struct gendisk *disk)
 {
 	if (!disk || !disk->random)
@@ -667,6 +668,7 @@ void add_disk_randomness(struct gendisk *disk)
 }
 
 EXPORT_SYMBOL(add_disk_randomness);
+#endif
 
 #define EXTRACT_SIZE 10
 
@@ -918,6 +920,7 @@ void rand_initialize_irq(int irq)
 	}
 }
 
+#ifdef CONFIG_BLOCK
 void rand_initialize_disk(struct gendisk *disk)
 {
 	struct timer_rand_state *state;
@@ -932,6 +935,7 @@ void rand_initialize_disk(struct gendisk *disk)
 		disk->random = state;
 	}
 }
+#endif
 
 static ssize_t
 random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index b6fb167e20f6..69d627bd537a 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -4,6 +4,8 @@
 # Andre Hedrick <andre@linux-ide.org>
 #
 
+if BLOCK
+
 menu "ATA/ATAPI/MFM/RLL support"
 
 config IDE
@@ -1082,3 +1084,5 @@ config BLK_DEV_HD
 endif
 
 endmenu
+
+endif
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index bf869ed03eed..6dd31a291d84 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -2,6 +2,8 @@
 # Block device driver configuration
 #
 
+if BLOCK
+
 menu "Multi-device support (RAID and LVM)"
 
 config MD
@@ -251,3 +253,4 @@ config DM_MULTIPATH_EMC
 
 endmenu
 
+endif
diff --git a/drivers/message/i2o/Kconfig b/drivers/message/i2o/Kconfig
index fef677103880..6443392bffff 100644
--- a/drivers/message/i2o/Kconfig
+++ b/drivers/message/i2o/Kconfig
@@ -88,7 +88,7 @@ config I2O_BUS
 
 config I2O_BLOCK
 	tristate "I2O Block OSM"
-	depends on I2O
+	depends on I2O && BLOCK
 	---help---
 	  Include support for the I2O Block OSM. The Block OSM presents disk
 	  and other structured block devices to the operating system. If you
diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig
index 45bcf098e762..f540bd88dc5a 100644
--- a/drivers/mmc/Kconfig
+++ b/drivers/mmc/Kconfig
@@ -21,7 +21,7 @@ config MMC_DEBUG
 
 config MMC_BLOCK
 	tristate "MMC block device driver"
-	depends on MMC
+	depends on MMC && BLOCK
 	default y
 	help
 	  Say Y here to enable the MMC block device driver support.
diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index d2957e35cc6f..b1f6e03e7aa9 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile
@@ -24,7 +24,8 @@ obj-$(CONFIG_MMC_AU1X)		+= au1xmmc.o
 obj-$(CONFIG_MMC_OMAP)		+= omap.o
 obj-$(CONFIG_MMC_AT91RM9200)	+= at91_mci.o
 
-mmc_core-y := mmc.o mmc_queue.o mmc_sysfs.o
+mmc_core-y := mmc.o mmc_sysfs.o
+mmc_core-$(CONFIG_BLOCK) += mmc_queue.o
 
 ifeq ($(CONFIG_MMC_DEBUG),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index a03e862851db..a304b34c2632 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -166,7 +166,7 @@ config MTD_CHAR
 
 config MTD_BLOCK
 	tristate "Caching block device access to MTD devices"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  Although most flash chips have an erase size too large to be useful
 	  as block devices, it is possible to use MTD devices which are based
@@ -188,7 +188,7 @@ config MTD_BLOCK
 
 config MTD_BLOCK_RO
 	tristate "Readonly block device access to MTD devices"
-	depends on MTD_BLOCK!=y && MTD
+	depends on MTD_BLOCK!=y && MTD && BLOCK
 	help
 	  This allows you to mount read-only file systems (such as cramfs)
 	  from an MTD device, without the overhead (and danger) of the caching
@@ -199,7 +199,7 @@ config MTD_BLOCK_RO
 
 config FTL
 	tristate "FTL (Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  This provides support for the original Flash Translation Layer which
 	  is part of the PCMCIA specification. It uses a kind of pseudo-
@@ -215,7 +215,7 @@ config FTL
 
 config NFTL
 	tristate "NFTL (NAND Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  This provides support for the NAND Flash Translation Layer which is
 	  used on M-Systems' DiskOnChip devices. It uses a kind of pseudo-
@@ -238,7 +238,7 @@ config NFTL_RW
 
 config INFTL
 	tristate "INFTL (Inverse NAND Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  This provides support for the Inverse NAND Flash Translation
 	  Layer which is used on M-Systems' newer DiskOnChip devices. It
@@ -255,7 +255,7 @@ config INFTL
 
 config RFD_FTL
         tristate "Resident Flash Disk (Flash Translation Layer) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	---help---
 	  This provides support for the flash translation layer known
 	  as the Resident Flash Disk (RFD), as used by the Embedded BIOS
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 16c02b5ccf7e..440f6851da69 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -136,7 +136,7 @@ config MTDRAM_ABS_POS
 
 config MTD_BLOCK2MTD
 	tristate "MTD using block device"
-	depends on MTD
+	depends on MTD && BLOCK
 	help
 	  This driver allows a block device to appear as an MTD. It would
 	  generally be used in the following cases:
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 929d6fff6152..b250c5354503 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -1,4 +1,4 @@
-if S390
+if S390 && BLOCK
 
 comment "S/390 block device drivers"
 	depends on S390
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index c4dfcc91ddda..dab082002e6f 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -3,11 +3,13 @@ menu "SCSI device support"
 config RAID_ATTRS
 	tristate "RAID Transport Class"
 	default n
+	depends on BLOCK
 	---help---
 	  Provides RAID
 
 config SCSI
 	tristate "SCSI device support"
+	depends on BLOCK
 	---help---
 	  If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
 	  any other SCSI device under Linux, say Y and make sure that you know
diff --git a/fs/Kconfig b/fs/Kconfig
index 4fd9efac29ab..1453d2d164f7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,8 @@
 
 menu "File systems"
 
+if BLOCK
+
 config EXT2_FS
 	tristate "Second extended fs support"
 	help
@@ -399,6 +401,8 @@ config ROMFS_FS
 	  If you don't know whether you need it, then you don't need it:
 	  answer N.
 
+endif
+
 config INOTIFY
 	bool "Inotify file change notification support"
 	default y
@@ -530,6 +534,7 @@ config FUSE_FS
 	  If you want to develop a userspace FS, or if you want to use
 	  a filesystem based on FUSE, answer Y or M.
 
+if BLOCK
 menu "CD-ROM/DVD Filesystems"
 
 config ISO9660_FS
@@ -597,7 +602,9 @@ config UDF_NLS
 	depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y)
 
 endmenu
+endif
 
+if BLOCK
 menu "DOS/FAT/NT Filesystems"
 
 config FAT_FS
@@ -782,6 +789,7 @@ config NTFS_RW
 	  It is perfectly safe to say N here.
 
 endmenu
+endif
 
 menu "Pseudo filesystems"
 
@@ -939,7 +947,7 @@ menu "Miscellaneous filesystems"
 
 config ADFS_FS
 	tristate "ADFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  The Acorn Disc Filing System is the standard file system of the
 	  RiscOS operating system which runs on Acorn's ARM-based Risc PC
@@ -967,7 +975,7 @@ config ADFS_FS_RW
 
 config AFFS_FS
 	tristate "Amiga FFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  The Fast File System (FFS) is the common file system used on hard
 	  disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20).  Say Y
@@ -989,7 +997,7 @@ config AFFS_FS
 
 config HFS_FS
 	tristate "Apple Macintosh file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	select NLS
 	help
 	  If you say Y here, you will be able to mount Macintosh-formatted
@@ -1002,6 +1010,7 @@ config HFS_FS
 
 config HFSPLUS_FS
 	tristate "Apple Extended HFS file system support"
+	depends on BLOCK
 	select NLS
 	select NLS_UTF8
 	help
@@ -1015,7 +1024,7 @@ config HFSPLUS_FS
 
 config BEFS_FS
 	tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	select NLS
 	help
 	  The BeOS File System (BeFS) is the native file system of Be, Inc's
@@ -1042,7 +1051,7 @@ config BEFS_DEBUG
 
 config BFS_FS
 	tristate "BFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  Boot File System (BFS) is a file system used under SCO UnixWare to
 	  allow the bootloader access to the kernel image and other important
@@ -1064,7 +1073,7 @@ config BFS_FS
 
 config EFS_FS
 	tristate "EFS file system support (read only) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  EFS is an older file system used for non-ISO9660 CD-ROMs and hard
 	  disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
@@ -1079,7 +1088,7 @@ config EFS_FS
 
 config JFFS_FS
 	tristate "Journalling Flash File System (JFFS) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	help
 	  JFFS is the Journaling Flash File System developed by Axis
 	  Communications in Sweden, aimed at providing a crash/powerdown-safe
@@ -1264,6 +1273,7 @@ endchoice
 
 config CRAMFS
 	tristate "Compressed ROM file system support (cramfs)"
+	depends on BLOCK
 	select ZLIB_INFLATE
 	help
 	  Saying Y here includes support for CramFs (Compressed ROM File
@@ -1283,6 +1293,7 @@ config CRAMFS
 
 config VXFS_FS
 	tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
+	depends on BLOCK
 	help
 	  FreeVxFS is a file system driver that support the VERITAS VxFS(TM)
 	  file system format.  VERITAS VxFS(TM) is the standard file system
@@ -1300,6 +1311,7 @@ config VXFS_FS
 
 config HPFS_FS
 	tristate "OS/2 HPFS file system support"
+	depends on BLOCK
 	help
 	  OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
 	  is the file system used for organizing files on OS/2 hard disk
@@ -1316,6 +1328,7 @@ config HPFS_FS
 
 config QNX4FS_FS
 	tristate "QNX4 file system support (read only)"
+	depends on BLOCK
 	help
 	  This is the file system used by the real-time operating systems
 	  QNX 4 and QNX 6 (the latter is also called QNX RTP).
@@ -1343,6 +1356,7 @@ config QNX4FS_RW
 
 config SYSV_FS
 	tristate "System V/Xenix/V7/Coherent file system support"
+	depends on BLOCK
 	help
 	  SCO, Xenix and Coherent are commercial Unix systems for Intel
 	  machines, and Version 7 was used on the DEC PDP-11. Saying Y
@@ -1381,6 +1395,7 @@ config SYSV_FS
 
 config UFS_FS
 	tristate "UFS file system support (read only)"
+	depends on BLOCK
 	help
 	  BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
 	  OpenBSD and NeXTstep) use a file system called UFS. Some System V
@@ -1959,11 +1974,13 @@ config GENERIC_ACL
 
 endmenu
 
+if BLOCK
 menu "Partition Types"
 
 source "fs/partitions/Kconfig"
 
 endmenu
+endif
 
 source "fs/nls/Kconfig"
 
diff --git a/fs/Makefile b/fs/Makefile
index 46b8cfe497b2..a503e6ce0f32 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -5,12 +5,18 @@
 # Rewritten to use lists instead of if-statements.
 # 
 
-obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
-		block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
+obj-y :=	open.o read_write.o file_table.o super.o \
+		char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
-		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o splice.o sync.o
+		seq_file.o xattr.o libfs.o fs-writeback.o \
+		pnode.o drop_caches.o splice.o sync.o
+
+ifeq ($(CONFIG_BLOCK),y)
+obj-y +=	buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+else
+obj-y +=	no-block.o
+endif
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e1a56437040a..64b34533edea 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -645,6 +645,7 @@ out:
 }
 #endif
 
+#ifdef CONFIG_BLOCK
 struct hd_geometry32 {
 	unsigned char heads;
 	unsigned char sectors;
@@ -869,6 +870,7 @@ static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 	}
 	return err;
 }
+#endif /* CONFIG_BLOCK */
 
 struct sock_fprog32 {
 	unsigned short	len;
@@ -992,6 +994,7 @@ static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
 }
 
 
+#ifdef CONFIG_BLOCK
 struct mtget32 {
 	compat_long_t	mt_type;
 	compat_long_t	mt_resid;
@@ -1164,6 +1167,7 @@ static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 
 	return err;
 }
+#endif /* CONFIG_BLOCK */
 
 #ifdef CONFIG_VT
 
@@ -1491,6 +1495,7 @@ ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return -EINVAL;
 }
 
+#ifdef CONFIG_BLOCK
 static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	/* The mkswap binary hard codes it to Intel value :-((( */
@@ -1525,12 +1530,14 @@ static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long ar
 
 	return sys_ioctl(fd, cmd, (unsigned long)a);
 }
+#endif
 
 static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
 }
 
+#ifdef CONFIG_BLOCK
 /* Fix sizeof(sizeof()) breakage */
 #define BLKBSZGET_32   _IOR(0x12,112,int)
 #define BLKBSZSET_32   _IOW(0x12,113,int)
@@ -1551,6 +1558,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
 {
        return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
 }
+#endif
 
 /* Bluetooth ioctls */
 #define HCIUARTSETPROTO	_IOW('U', 200, int)
@@ -1571,6 +1579,7 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
 #define HIDPGETCONNLIST	_IOR('H', 210, int)
 #define HIDPGETCONNINFO	_IOR('H', 211, int)
 
+#ifdef CONFIG_BLOCK
 struct floppy_struct32 {
 	compat_uint_t	size;
 	compat_uint_t	sect;
@@ -1895,6 +1904,7 @@ out:
 	kfree(karg);
 	return err;
 }
+#endif
 
 struct mtd_oob_buf32 {
 	u_int32_t start;
@@ -1936,6 +1946,7 @@ static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return err;
 }	
 
+#ifdef CONFIG_BLOCK
 struct raw32_config_request
 {
         compat_int_t    raw_minor;
@@ -2000,6 +2011,7 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
         }
         return ret;
 }
+#endif /* CONFIG_BLOCK */
 
 struct serial_struct32 {
         compat_int_t    type;
@@ -2606,6 +2618,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
 HANDLE_IOCTL(SIOCRTMSG, ret_einval)
 HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
 #endif
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
 HANDLE_IOCTL(BLKRAGET, w_long)
 HANDLE_IOCTL(BLKGETSIZE, w_long)
@@ -2631,14 +2644,17 @@ HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
 HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
 HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
 HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
+#endif
 HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
 HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
 HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
 HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
+#endif
 #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
 HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
 #ifdef CONFIG_VT
@@ -2677,12 +2693,14 @@ HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
 /* block stuff */
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
 HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
 HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
 /* Raw devices */
 HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
 HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
+#endif
 /* Serial */
 HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
 HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
diff --git a/fs/internal.h b/fs/internal.h
index f662b703bb97..f07147d63255 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -14,10 +14,16 @@
 /*
  * block_dev.c
  */
+#ifdef CONFIG_BLOCK
 extern struct super_block *blockdev_superblock;
 extern void __init bdev_cache_init(void);
 
 #define sb_is_blkdev_sb(sb) ((sb) == blockdev_superblock)
+#else
+static inline void bdev_cache_init(void) {}
+
+#define sb_is_blkdev_sb(sb) 0
+#endif
 
 /*
  * char_dev.c
diff --git a/fs/no-block.c b/fs/no-block.c
new file mode 100644
index 000000000000..d269a93d3467
--- /dev/null
+++ b/fs/no-block.c
@@ -0,0 +1,22 @@
+/* no-block.c: implementation of routines required for non-BLOCK configuration
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+
+static int no_blkdev_open(struct inode * inode, struct file * filp)
+{
+	return -ENODEV;
+}
+
+const struct file_operations def_blk_fops = {
+	.open		= no_blkdev_open,
+};
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index d713ce6b3e12..67e665fdb7fc 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y := check.o
+obj-$(CONFIG_BLOCK) := check.o
 
 obj-$(CONFIG_ACORN_PARTITION) += acorn.o
 obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5bbd60896050..66bc425f2f3d 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -277,12 +277,15 @@ static int devinfo_show(struct seq_file *f, void *v)
 		if (i == 0)
 			seq_printf(f, "Character devices:\n");
 		chrdev_show(f, i);
-	} else {
+	}
+#ifdef CONFIG_BLOCK
+	else {
 		i -= CHRDEV_MAJOR_HASH_SIZE;
 		if (i == 0)
 			seq_printf(f, "\nBlock devices:\n");
 		blkdev_show(f, i);
 	}
+#endif
 	return 0;
 }
 
@@ -355,6 +358,7 @@ static int stram_read_proc(char *page, char **start, off_t off,
 }
 #endif
 
+#ifdef CONFIG_BLOCK
 extern struct seq_operations partitions_op;
 static int partitions_open(struct inode *inode, struct file *file)
 {
@@ -378,6 +382,7 @@ static struct file_operations proc_diskstats_operations = {
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
+#endif
 
 #ifdef CONFIG_MODULES
 extern struct seq_operations modules_op;
@@ -695,7 +700,9 @@ void __init proc_misc_init(void)
 		entry->proc_fops = &proc_kmsg_operations;
 	create_seq_entry("devices", 0, &proc_devinfo_operations);
 	create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
+#ifdef CONFIG_BLOCK
 	create_seq_entry("partitions", 0, &proc_partitions_operations);
+#endif
 	create_seq_entry("stat", 0, &proc_stat_operations);
 	create_seq_entry("interrupts", 0, &proc_interrupts_operations);
 #ifdef CONFIG_SLAB
@@ -707,7 +714,9 @@ void __init proc_misc_init(void)
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
 	create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
+#ifdef CONFIG_BLOCK
 	create_seq_entry("diskstats", 0, &proc_diskstats_operations);
+#endif
 #ifdef CONFIG_MODULES
 	create_seq_entry("modules", 0, &proc_modules_operations);
 #endif
diff --git a/fs/quota.c b/fs/quota.c
index d6a2be826e29..b9dae76a0b6e 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -337,6 +337,34 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
 	return 0;
 }
 
+/*
+ * look up a superblock on which quota ops will be performed
+ * - use the name of a block device to find the superblock thereon
+ */
+static inline struct super_block *quotactl_block(const char __user *special)
+{
+#ifdef CONFIG_BLOCK
+	struct block_device *bdev;
+	struct super_block *sb;
+	char *tmp = getname(special);
+
+	if (IS_ERR(tmp))
+		return ERR_PTR(PTR_ERR(tmp));
+	bdev = lookup_bdev(tmp);
+	putname(tmp);
+	if (IS_ERR(bdev))
+		return ERR_PTR(PTR_ERR(bdev));
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (!sb)
+		return ERR_PTR(-ENODEV);
+
+	return sb;
+#else
+	return ERR_PTR(-ENODEV);
+#endif
+}
+
 /*
  * This is the system call interface. This communicates with
  * the user-level programs. Currently this only supports diskquota
@@ -347,25 +375,15 @@ asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t
 {
 	uint cmds, type;
 	struct super_block *sb = NULL;
-	struct block_device *bdev;
-	char *tmp;
 	int ret;
 
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
 	if (cmds != Q_SYNC || special) {
-		tmp = getname(special);
-		if (IS_ERR(tmp))
-			return PTR_ERR(tmp);
-		bdev = lookup_bdev(tmp);
-		putname(tmp);
-		if (IS_ERR(bdev))
-			return PTR_ERR(bdev);
-		sb = get_super(bdev);
-		bdput(bdev);
-		if (!sb)
-			return -ENODEV;
+		sb = quotactl_block(special);
+		if (IS_ERR(sb))
+			return PTR_ERR(sb);
 	}
 
 	ret = check_quotactl_valid(sb, type, cmds, id);
diff --git a/fs/super.c b/fs/super.c
index 15671cd048b1..aec99ddbe53f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -571,8 +571,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
 	int retval;
 	
+#ifdef CONFIG_BLOCK
 	if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
 		return -EACCES;
+#endif
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
@@ -692,6 +694,7 @@ void kill_litter_super(struct super_block *sb)
 
 EXPORT_SYMBOL(kill_litter_super);
 
+#ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
 {
 	s->s_bdev = data;
@@ -787,6 +790,7 @@ void kill_block_super(struct super_block *sb)
 }
 
 EXPORT_SYMBOL(kill_block_super);
+#endif
 
 int get_sb_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 26b364c9d62c..35115bca036e 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,5 +1,6 @@
 config XFS_FS
 	tristate "XFS filesystem support"
+	depends on BLOCK
 	help
 	  XFS is a high performance journaling filesystem which originated
 	  on the SGI IRIX platform.  It is completely multi-threaded, can
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2c01a90998a7..3e36107d342a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -16,6 +16,22 @@
 
 #include <asm/scatterlist.h>
 
+#ifdef CONFIG_LBD
+# include <asm/div64.h>
+# define sector_div(a, b) do_div(a, b)
+#else
+# define sector_div(n, b)( \
+{ \
+	int _res; \
+	_res = (n) % (b); \
+	(n) /= (b); \
+	_res; \
+} \
+)
+#endif
+
+#ifdef CONFIG_BLOCK
+
 struct scsi_ioctl_command;
 
 struct request_queue;
@@ -818,24 +834,30 @@ struct work_struct;
 int kblockd_schedule_work(struct work_struct *work);
 void kblockd_flush(void);
 
-#ifdef CONFIG_LBD
-# include <asm/div64.h>
-# define sector_div(a, b) do_div(a, b)
-#else
-# define sector_div(n, b)( \
-{ \
-	int _res; \
-	_res = (n) % (b); \
-	(n) /= (b); \
-	_res; \
-} \
-)
-#endif 
-
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-*")
 
 
+#else /* CONFIG_BLOCK */
+/*
+ * stubs for when the block layer is configured out
+ */
+#define buffer_heads_over_limit 0
+
+static inline long blk_congestion_wait(int rw, long timeout)
+{
+	return timeout;
+}
+
+static inline long nr_blockdev_pages(void)
+{
+	return 0;
+}
+
+static inline void exit_io_context(void) {}
+
+#endif /* CONFIG_BLOCK */
+
 #endif
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 64b508e35d2a..131ffd37e716 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -14,6 +14,8 @@
 #include <linux/wait.h>
 #include <asm/atomic.h>
 
+#ifdef CONFIG_BLOCK
+
 enum bh_state_bits {
 	BH_Uptodate,	/* Contains valid data */
 	BH_Dirty,	/* Is dirty */
@@ -301,4 +303,18 @@ static inline void lock_buffer(struct buffer_head *bh)
 }
 
 extern int __set_page_dirty_buffers(struct page *page);
+
+#else /* CONFIG_BLOCK */
+
+static inline void buffer_init(void) {}
+static inline int try_to_free_buffers(struct page *page) { return 1; }
+static inline int sync_blockdev(struct block_device *bdev) { return 0; }
+static inline int inode_has_buffers(struct inode *inode) { return 0; }
+static inline void invalidate_inode_buffers(struct inode *inode) {}
+static inline int remove_inode_buffers(struct inode *inode) { return 1; }
+static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
+static inline void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers) {}
+
+
+#endif /* CONFIG_BLOCK */
 #endif /* _LINUX_BUFFER_HEAD_H */
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index 98d40e08ba6e..d61ef5951538 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -90,6 +90,7 @@ COMPATIBLE_IOCTL(FDTWADDLE)
 COMPATIBLE_IOCTL(FDFMTTRK)
 COMPATIBLE_IOCTL(FDRAWCMD)
 /* 0x12 */
+#ifdef CONFIG_BLOCK
 COMPATIBLE_IOCTL(BLKRASET)
 COMPATIBLE_IOCTL(BLKROSET)
 COMPATIBLE_IOCTL(BLKROGET)
@@ -103,6 +104,7 @@ COMPATIBLE_IOCTL(BLKTRACESETUP)
 COMPATIBLE_IOCTL(BLKTRACETEARDOWN)
 ULONG_IOCTL(BLKRASET)
 ULONG_IOCTL(BLKFRASET)
+#endif
 /* RAID */
 COMPATIBLE_IOCTL(RAID_VERSION)
 COMPATIBLE_IOCTL(GET_ARRAY_INFO)
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 9c5a04f6114c..b3370ef5164d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -3,6 +3,8 @@
 
 #include <linux/percpu.h>
 
+#ifdef CONFIG_BLOCK
+
 typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
 				 struct bio *);
 
@@ -203,4 +205,5 @@ enum {
 	__val;							\
 })
 
+#endif /* CONFIG_BLOCK */
 #endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b73a47582dbe..5baf3a153403 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1482,6 +1482,7 @@ extern void __init vfs_caches_init(unsigned long);
 extern void putname(const char *name);
 #endif
 
+#ifdef CONFIG_BLOCK
 extern int register_blkdev(unsigned int, const char *);
 extern int unregister_blkdev(unsigned int, const char *);
 extern struct block_device *bdget(dev_t);
@@ -1490,11 +1491,15 @@ extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, unsigned);
 extern struct block_device *open_partition_by_devnum(dev_t, unsigned);
-extern const struct file_operations def_blk_fops;
 extern const struct address_space_operations def_blk_aops;
+#else
+static inline void bd_forget(struct inode *inode) {}
+#endif
+extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
 extern const struct file_operations bad_sock_fops;
 extern const struct file_operations def_fifo_fops;
+#ifdef CONFIG_BLOCK
 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
 extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
@@ -1510,6 +1515,7 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *);
 #define bd_claim_by_disk(bdev, holder, disk)	bd_claim(bdev, holder)
 #define bd_release_from_disk(bdev, disk)	bd_release(bdev)
 #endif
+#endif
 
 /* fs/char_dev.c */
 #define CHRDEV_MAJOR_HASH_SIZE	255
@@ -1523,14 +1529,19 @@ extern int chrdev_open(struct inode *, struct file *);
 extern void chrdev_show(struct seq_file *,off_t);
 
 /* fs/block_dev.c */
-#define BLKDEV_MAJOR_HASH_SIZE	255
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
+
+#ifdef CONFIG_BLOCK
+#define BLKDEV_MAJOR_HASH_SIZE	255
 extern const char *__bdevname(dev_t, char *buffer);
 extern const char *bdevname(struct block_device *bdev, char *buffer);
 extern struct block_device *lookup_bdev(const char *);
 extern struct block_device *open_bdev_excl(const char *, int, void *);
 extern void close_bdev_excl(struct block_device *);
 extern void blkdev_show(struct seq_file *,off_t);
+#else
+#define BLKDEV_MAJOR_HASH_SIZE	0
+#endif
 
 extern void init_special_inode(struct inode *, umode_t, dev_t);
 
@@ -1544,6 +1555,7 @@ extern const struct file_operations rdwr_fifo_fops;
 
 extern int fs_may_remount_ro(struct super_block *);
 
+#ifdef CONFIG_BLOCK
 /*
  * return READ, READA, or WRITE
  */
@@ -1555,9 +1567,10 @@ extern int fs_may_remount_ro(struct super_block *);
 #define bio_data_dir(bio)	((bio)->bi_rw & 1)
 
 extern int check_disk_change(struct block_device *);
-extern int invalidate_inodes(struct super_block *);
 extern int __invalidate_device(struct block_device *);
 extern int invalidate_partition(struct gendisk *, int);
+#endif
+extern int invalidate_inodes(struct super_block *);
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
 					pgoff_t start, pgoff_t end);
 unsigned long invalidate_inode_pages(struct address_space *mapping);
@@ -1590,7 +1603,9 @@ extern void emergency_sync(void);
 extern void emergency_remount(void);
 extern int do_remount_sb(struct super_block *sb, int flags,
 			 void *data, int force);
+#ifdef CONFIG_BLOCK
 extern sector_t bmap(struct inode *, sector_t);
+#endif
 extern int notify_change(struct dentry *, struct iattr *);
 extern int permission(struct inode *, int, struct nameidata *);
 extern int generic_permission(struct inode *, int,
@@ -1673,9 +1688,11 @@ static inline void insert_inode_hash(struct inode *inode) {
 extern struct file * get_empty_filp(void);
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
+#ifdef CONFIG_BLOCK
 struct bio;
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
+#endif
 extern int set_blocksize(struct block_device *, int);
 extern int sb_set_blocksize(struct super_block *, int);
 extern int sb_min_blocksize(struct super_block *, int);
@@ -1756,6 +1773,7 @@ static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
 				actor);
 }
 
+#ifdef CONFIG_BLOCK
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset,
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
@@ -1793,6 +1811,7 @@ static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
 				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
 }
+#endif
 
 extern const struct file_operations generic_ro_fops;
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e4af57e87c17..41f276fdd185 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -11,6 +11,8 @@
 
 #include <linux/types.h>
 
+#ifdef CONFIG_BLOCK
+
 enum {
 /* These three have identical behaviour; use the second one if DOS FDISK gets
    confused about extended/logical partitions starting past cylinder 1023. */
@@ -420,3 +422,5 @@ static inline struct block_device *bdget_disk(struct gendisk *disk, int index)
 #endif
 
 #endif
+
+#endif
diff --git a/include/linux/mpage.h b/include/linux/mpage.h
index 517c098fde20..cc5fb75af78a 100644
--- a/include/linux/mpage.h
+++ b/include/linux/mpage.h
@@ -9,6 +9,7 @@
  * (And no, it doesn't do the #ifdef __MPAGE_H thing, and it doesn't do
  * nested includes.  Get it right in the .c file).
  */
+#ifdef CONFIG_BLOCK
 
 struct writeback_control;
 typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
@@ -20,3 +21,5 @@ int mpage_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, get_block_t get_block);
 int mpage_writepage(struct page *page, get_block_t *get_block,
 		struct writeback_control *wbc);
+
+#endif
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index eb3e547c8fee..c588709acbbc 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -53,6 +53,8 @@
 #include <linux/raid/md_u.h>
 #include <linux/raid/md_k.h>
 
+#ifdef CONFIG_MD
+
 /*
  * Different major versions are not compatible.
  * Different minor versions are only downward compatible.
@@ -95,5 +97,6 @@ extern void md_new_event(mddev_t *mddev);
 
 extern void md_update_sb(mddev_t * mddev);
 
+#endif /* CONFIG_MD */
 #endif 
 
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index d28890295852..920b94fe31fa 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -18,6 +18,8 @@
 /* and dm-bio-list.h is not under include/linux because.... ??? */
 #include "../../../drivers/md/dm-bio-list.h"
 
+#ifdef CONFIG_BLOCK
+
 #define	LEVEL_MULTIPATH		(-4)
 #define	LEVEL_LINEAR		(-1)
 #define	LEVEL_FAULTY		(-5)
@@ -362,5 +364,6 @@ static inline void safe_put_page(struct page *p)
 	if (p) put_page(p);
 }
 
+#endif /* CONFIG_BLOCK */
 #endif
 
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index bbf66219b769..c247a28259bc 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -6,7 +6,6 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 
-
 #define MSG_SIMPLE_TAG	0x20
 #define MSG_HEAD_TAG	0x21
 #define MSG_ORDERED_TAG	0x22
@@ -14,6 +13,7 @@
 #define SCSI_NO_TAG	(-1)    /* identify no tag in use */
 
 
+#ifdef CONFIG_BLOCK
 
 /**
  * scsi_get_tag_type - get the type of tag the device supports
@@ -144,4 +144,5 @@ static inline int scsi_init_shared_tag_map(struct Scsi_Host *shost, int depth)
 	return shost->bqt ? 0 : -ENOMEM;
 }
 
+#endif /* CONFIG_BLOCK */
 #endif /* _SCSI_SCSI_TCQ_H */
diff --git a/init/Kconfig b/init/Kconfig
index 4381006dd666..d2eb7a84a264 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -92,7 +92,7 @@ config LOCALVERSION_AUTO
 
 config SWAP
 	bool "Support for paging of anonymous memory (swap)"
-	depends on MMU
+	depends on MMU && BLOCK
 	default y
 	help
 	  This option allows you to choose whether you want to have support
diff --git a/init/do_mounts.c b/init/do_mounts.c
index b290aadb1d3f..dc1ec0803ef9 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -285,7 +285,11 @@ void __init mount_block_root(char *name, int flags)
 {
 	char *fs_names = __getname();
 	char *p;
+#ifdef CONFIG_BLOCK
 	char b[BDEVNAME_SIZE];
+#else
+	const char *b = name;
+#endif
 
 	get_fs_names(fs_names);
 retry:
@@ -304,7 +308,9 @@ retry:
 		 * Allow the user to distinguish between failed sys_open
 		 * and bad superblock on root device.
 		 */
+#ifdef CONFIG_BLOCK
 		__bdevname(ROOT_DEV, b);
+#endif
 		printk("VFS: Cannot open root device \"%s\" or %s\n",
 				root_device_name, b);
 		printk("Please append a correct \"root=\" boot option\n");
@@ -316,7 +322,10 @@ retry:
 	for (p = fs_names; *p; p += strlen(p)+1)
 		printk(" %s", p);
 	printk("\n");
-	panic("VFS: Unable to mount root fs on %s", __bdevname(ROOT_DEV, b));
+#ifdef CONFIG_BLOCK
+	__bdevname(ROOT_DEV, b);
+#endif
+	panic("VFS: Unable to mount root fs on %s", b);
 out:
 	putname(fs_names);
 }
@@ -387,8 +396,10 @@ void __init mount_root(void)
 			change_floppy("root floppy");
 	}
 #endif
+#ifdef CONFIG_BLOCK
 	create_dev("/dev/root", ROOT_DEV);
 	mount_block_root("/dev/root", root_mountflags);
+#endif
 }
 
 /*
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 6991bece67e8..7a3b2e75f040 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -134,3 +134,8 @@ cond_syscall(sys_madvise);
 cond_syscall(sys_mremap);
 cond_syscall(sys_remap_file_pages);
 cond_syscall(compat_sys_move_pages);
+
+/* block-layer dependent */
+cond_syscall(sys_bdflush);
+cond_syscall(sys_ioprio_set);
+cond_syscall(sys_ioprio_get);
diff --git a/mm/Makefile b/mm/Makefile
index 4f2166a833b9..12b3a4eee88d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -12,7 +12,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
 
-ifeq ($(CONFIG_MMU),y)
+ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
 obj-y			+= bounce.o
 endif
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
diff --git a/mm/filemap.c b/mm/filemap.c
index d6846de08887..c4fe97f5ace0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2020,6 +2020,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
 		if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
 			*count = inode->i_sb->s_maxbytes - *pos;
 	} else {
+#ifdef CONFIG_BLOCK
 		loff_t isize;
 		if (bdev_read_only(I_BDEV(inode)))
 			return -EPERM;
@@ -2031,6 +2032,9 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
 
 		if (*pos + *count > isize)
 			*count = isize - *pos;
+#else
+		return -EPERM;
+#endif
 	}
 	return 0;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 7f50e3ff54cd..ba2453f9483d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -409,6 +409,7 @@ int migrate_page(struct address_space *mapping,
 }
 EXPORT_SYMBOL(migrate_page);
 
+#ifdef CONFIG_BLOCK
 /*
  * Migration function for pages with buffers. This function can only be used
  * if the underlying filesystem guarantees that no other references to "page"
@@ -466,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping,
 	return 0;
 }
 EXPORT_SYMBOL(buffer_migrate_page);
+#endif
 
 /*
  * Writeback a page to clean the dirty state
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ecf27839c203..c0d4ce144dec 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -807,9 +807,11 @@ int fastcall set_page_dirty(struct page *page)
 
 	if (likely(mapping)) {
 		int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
-		if (spd)
-			return (*spd)(page);
-		return __set_page_dirty_buffers(page);
+#ifdef CONFIG_BLOCK
+		if (!spd)
+			spd = __set_page_dirty_buffers;
+#endif
+		return (*spd)(page);
 	}
 	if (!PageDirty(page)) {
 		if (!TestSetPageDirty(page))
diff --git a/mm/truncate.c b/mm/truncate.c
index cd3e34b816db..8fde6580657e 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -35,8 +35,10 @@ void do_invalidatepage(struct page *page, unsigned long offset)
 {
 	void (*invalidatepage)(struct page *, unsigned long);
 	invalidatepage = page->mapping->a_ops->invalidatepage;
+#ifdef CONFIG_BLOCK
 	if (!invalidatepage)
 		invalidatepage = block_invalidatepage;
+#endif
 	if (invalidatepage)
 		(*invalidatepage)(page, offset);
 }
-- 
cgit v1.2.3


From bcfd8d36151e531e1c6c731f1fbf792509a1c494 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 31 Aug 2006 12:56:06 +0200
Subject: [PATCH] CONFIG_BLOCK: blk_congestion_wait() fix

Don't just do nothing: it'll cause busywaits all over writeback and page
reclaim.

For now, take a fixed-length nap.  Will improve when NFS starts waking up
throttled processes.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3e36107d342a..1d79b8d4ca6d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_BLKDEV_H
 #define _LINUX_BLKDEV_H
 
+#include <linux/sched.h>
 #include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
@@ -848,7 +849,7 @@ void kblockd_flush(void);
 
 static inline long blk_congestion_wait(int rw, long timeout)
 {
-	return timeout;
+	return io_schedule_timeout(timeout);
 }
 
 static inline long nr_blockdev_pages(void)
@@ -856,7 +857,9 @@ static inline long nr_blockdev_pages(void)
 	return 0;
 }
 
-static inline void exit_io_context(void) {}
+static inline void exit_io_context(void)
+{
+}
 
 #endif /* CONFIG_BLOCK */
 
-- 
cgit v1.2.3