From fa3959f457109cc7d082b86ea6daae927982815b Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 24 Apr 2008 01:27:02 +0200
Subject: mv643xx_eth: get rid of static variables, allow multiple instances

Move mv643xx_eth's static state (ethernet register block base address
and MII management interface spinlock) into a struct hanging off the
shared platform device.  This is necessary to support chips that
contain multiple mv643xx_eth silicon blocks.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Acked-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Dale Farnsworth <dale@farnsworth.org>
---
 include/linux/mv643xx_eth.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 30e11aa3c1c9..2d59855b61c1 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -1,6 +1,7 @@
 /*
  * MV-643XX ethernet platform device data definition file.
  */
+
 #ifndef __LINUX_MV643XX_ETH_H
 #define __LINUX_MV643XX_ETH_H
 
@@ -13,7 +14,9 @@
 #define MV643XX_ETH_BASE_ADDR_ENABLE_REG	0x2290
 
 struct mv643xx_eth_platform_data {
+	struct platform_device	*shared;
 	int		port_number;
+
 	u16		force_phy_addr;	/* force override if phy_addr == 0 */
 	u16		phy_addr;
 
-- 
cgit v1.2.3


From f2ce825d2a89b30af14fa577298fecaab7bc9504 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 24 Apr 2008 01:27:17 +0200
Subject: mv643xx_eth: mbus decode window support

Make it possible to pass mbus_dram_target_info to the mv643xx_eth
driver via the platform data, and make the mv643xx_eth driver
program the window registers based on this data if it is passed in.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Reviewed-by: Tzachi Perelstein <tzachi@marvell.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Dale Farnsworth <dale@farnsworth.org>
---
 drivers/net/mv643xx_eth.c   | 51 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mv643xx_eth.h |  6 ++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index eebf0d288e36..aabf1c60946d 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -91,6 +91,11 @@
  */
 #define PHY_ADDR_REG				0x0000
 #define SMI_REG					0x0004
+#define WINDOW_BASE(i)				(0x0200 + ((i) << 3))
+#define WINDOW_SIZE(i)				(0x0204 + ((i) << 3))
+#define WINDOW_REMAP_HIGH(i)			(0x0280 + ((i) << 2))
+#define WINDOW_BAR_ENABLE			0x0290
+#define WINDOW_PROTECT(i)			(0x0294 + ((i) << 4))
 
 /*
  * Per-port registers.
@@ -512,6 +517,8 @@ struct mv643xx_shared_private {
 
 	/* used to protect SMI_REG, which is shared across ports */
 	spinlock_t phy_lock;
+
+	u32 win_protect;
 };
 
 struct mv643xx_private {
@@ -1888,6 +1895,9 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 	mp->shared = platform_get_drvdata(pd->shared);
 	port_num = mp->port_num = pd->port_number;
 
+	if (mp->shared->win_protect)
+		wrl(mp, WINDOW_PROTECT(port_num), mp->shared->win_protect);
+
 	/* set default config values */
 	eth_port_uc_addr_get(mp, dev->dev_addr);
 	mp->rx_ring_size = PORT_DEFAULT_RECEIVE_QUEUE_SIZE;
@@ -1992,9 +2002,44 @@ static int mv643xx_eth_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static void mv643xx_eth_conf_mbus_windows(struct mv643xx_shared_private *msp,
+					  struct mbus_dram_target_info *dram)
+{
+	void __iomem *base = msp->eth_base;
+	u32 win_enable;
+	u32 win_protect;
+	int i;
+
+	for (i = 0; i < 6; i++) {
+		writel(0, base + WINDOW_BASE(i));
+		writel(0, base + WINDOW_SIZE(i));
+		if (i < 4)
+			writel(0, base + WINDOW_REMAP_HIGH(i));
+	}
+
+	win_enable = 0x3f;
+	win_protect = 0;
+
+	for (i = 0; i < dram->num_cs; i++) {
+		struct mbus_dram_window *cs = dram->cs + i;
+
+		writel((cs->base & 0xffff0000) |
+			(cs->mbus_attr << 8) |
+			dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+		writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+		win_enable &= ~(1 << i);
+		win_protect |= 3 << (2 * i);
+	}
+
+	writel(win_enable, base + WINDOW_BAR_ENABLE);
+	msp->win_protect = win_protect;
+}
+
 static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 {
 	static int mv643xx_version_printed = 0;
+	struct mv643xx_eth_shared_platform_data *pd = pdev->dev.platform_data;
 	struct mv643xx_shared_private *msp;
 	struct resource *res;
 	int ret;
@@ -2021,6 +2066,12 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, msp);
 
+	/*
+	 * (Re-)program MBUS remapping windows if we are asked to.
+	 */
+	if (pd != NULL && pd->dram != NULL)
+		mv643xx_eth_conf_mbus_windows(msp, pd->dram);
+
 	return 0;
 
 out_free:
diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 2d59855b61c1..4801b02b444e 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -5,6 +5,8 @@
 #ifndef __LINUX_MV643XX_ETH_H
 #define __LINUX_MV643XX_ETH_H
 
+#include <linux/mbus.h>
+
 #define MV643XX_ETH_SHARED_NAME		"mv643xx_eth_shared"
 #define MV643XX_ETH_NAME		"mv643xx_eth"
 #define MV643XX_ETH_SHARED_REGS		0x2000
@@ -13,6 +15,10 @@
 #define MV643XX_ETH_SIZE_REG_4		0x2224
 #define MV643XX_ETH_BASE_ADDR_ENABLE_REG	0x2290
 
+struct mv643xx_eth_shared_platform_data {
+	struct mbus_dram_target_info	*dram;
+};
+
 struct mv643xx_eth_platform_data {
 	struct platform_device	*shared;
 	int		port_number;
-- 
cgit v1.2.3


From c416a41f99be190e1f558cb06f70ddd560ce8b4b Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 24 Apr 2008 01:27:32 +0200
Subject: mv643xx_eth: configurable t_clk

Make t_clk configurable via platform device data (with the current
hardcoded value, 133 MHz, being the default), as it varies across
different chip families.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Acked-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Dale Farnsworth <dale@farnsworth.org>
---
 drivers/net/mv643xx_eth.c   | 17 +++++++++--------
 include/linux/mv643xx_eth.h |  1 +
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index aabf1c60946d..8bd41e4e88a9 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -519,6 +519,8 @@ struct mv643xx_shared_private {
 	spinlock_t phy_lock;
 
 	u32 win_protect;
+
+	unsigned int t_clk;
 };
 
 struct mv643xx_private {
@@ -1129,7 +1131,6 @@ static irqreturn_t mv643xx_eth_int_handler(int irq, void *dev_id)
  *
  * INPUT:
  *	struct mv643xx_private *mp	Ethernet port
- *	unsigned int t_clk		t_clk of the MV-643xx chip in HZ units
  *	unsigned int delay		Delay in usec
  *
  * OUTPUT:
@@ -1140,10 +1141,10 @@ static irqreturn_t mv643xx_eth_int_handler(int irq, void *dev_id)
  *
  */
 static unsigned int eth_port_set_rx_coal(struct mv643xx_private *mp,
-					unsigned int t_clk, unsigned int delay)
+					unsigned int delay)
 {
 	unsigned int port_num = mp->port_num;
-	unsigned int coal = ((t_clk / 1000000) * delay) / 64;
+	unsigned int coal = ((mp->shared->t_clk / 1000000) * delay) / 64;
 
 	/* Set RX Coalescing mechanism */
 	wrl(mp, SDMA_CONFIG_REG(port_num),
@@ -1168,7 +1169,6 @@ static unsigned int eth_port_set_rx_coal(struct mv643xx_private *mp,
  *
  * INPUT:
  *	struct mv643xx_private *mp	Ethernet port
- *	unsigned int t_clk		t_clk of the MV-643xx chip in HZ units
  *	unsigned int delay		Delay in uSeconds
  *
  * OUTPUT:
@@ -1179,9 +1179,9 @@ static unsigned int eth_port_set_rx_coal(struct mv643xx_private *mp,
  *
  */
 static unsigned int eth_port_set_tx_coal(struct mv643xx_private *mp,
-					unsigned int t_clk, unsigned int delay)
+					unsigned int delay)
 {
-	unsigned int coal = ((t_clk / 1000000) * delay) / 64;
+	unsigned int coal = ((mp->shared->t_clk / 1000000) * delay) / 64;
 
 	/* Set TX Coalescing mechanism */
 	wrl(mp, TX_FIFO_URGENT_THRESHOLD_REG(mp->port_num), coal << 4);
@@ -1423,11 +1423,11 @@ static int mv643xx_eth_open(struct net_device *dev)
 
 #ifdef MV643XX_COAL
 	mp->rx_int_coal =
-		eth_port_set_rx_coal(mp, 133000000, MV643XX_RX_COAL);
+		eth_port_set_rx_coal(mp, MV643XX_RX_COAL);
 #endif
 
 	mp->tx_int_coal =
-		eth_port_set_tx_coal(mp, 133000000, MV643XX_TX_COAL);
+		eth_port_set_tx_coal(mp, MV643XX_TX_COAL);
 
 	/* Unmask phy and link status changes interrupts */
 	wrl(mp, INTERRUPT_EXTEND_MASK_REG(port_num), ETH_INT_UNMASK_ALL_EXT);
@@ -2063,6 +2063,7 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
 		goto out_free;
 
 	spin_lock_init(&msp->phy_lock);
+	msp->t_clk = (pd != NULL && pd->t_clk != 0) ? pd->t_clk : 133000000;
 
 	platform_set_drvdata(pdev, msp);
 
diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 4801b02b444e..9f3a6032ff2e 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -17,6 +17,7 @@
 
 struct mv643xx_eth_shared_platform_data {
 	struct mbus_dram_target_info	*dram;
+	unsigned int	t_clk;
 };
 
 struct mv643xx_eth_platform_data {
-- 
cgit v1.2.3


From 240e4419e0cfcba737883b637ec2bdcc071ea03d Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 24 Apr 2008 01:27:44 +0200
Subject: mv643xx_eth: shorten shared platform driver name

Change the MV643XX_ETH_SHARED_NAME platform driver name to something
shorter than 19 characters, so that we can register multiple (otherwise
we end up with sysfs conflicts since all instances will map to
"mv643xx_eth_shared." as there is a 20-char sysfs file name limit.)

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Acked-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Dale Farnsworth <dale@farnsworth.org>
---
 include/linux/mv643xx_eth.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 9f3a6032ff2e..66dc9571922a 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -7,8 +7,8 @@
 
 #include <linux/mbus.h>
 
-#define MV643XX_ETH_SHARED_NAME		"mv643xx_eth_shared"
-#define MV643XX_ETH_NAME		"mv643xx_eth"
+#define MV643XX_ETH_SHARED_NAME		"mv643xx_eth"
+#define MV643XX_ETH_NAME		"mv643xx_eth_port"
 #define MV643XX_ETH_SHARED_REGS		0x2000
 #define MV643XX_ETH_SHARED_REGS_SIZE	0x2000
 #define MV643XX_ETH_BAR_4		0x2220
-- 
cgit v1.2.3


From ce4e2e4558903ef92edf1ab4e09b0b338a09fd61 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 24 Apr 2008 01:29:59 +0200
Subject: mv643xx_eth: inter-mv643xx SMI port sharing

There exist chips with up to four mv643xx_eth silicon blocks but
only one external SMI (MII management) interface -- the SMI logic
of the first block is shared by all the blocks.

Handle this by allowing a per-port override of which
mv643xx_eth_shared's SMI registers (and spinlock) to use.

Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Acked-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Dale Farnsworth <dale@farnsworth.org>
---
 drivers/net/mv643xx_eth.c   | 38 ++++++++++++++++++++++----------------
 include/linux/mv643xx_eth.h |  2 ++
 2 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 8bd41e4e88a9..b7915cdcc6a5 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -527,6 +527,8 @@ struct mv643xx_private {
 	struct mv643xx_shared_private *shared;
 	int port_num;			/* User Ethernet port number	*/
 
+	struct mv643xx_shared_private *shared_smi;
+
 	u32 rx_sram_addr;		/* Base address of rx sram area */
 	u32 rx_sram_size;		/* Size of rx sram area		*/
 	u32 tx_sram_addr;		/* Base address of tx sram area */
@@ -1898,6 +1900,10 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 	if (mp->shared->win_protect)
 		wrl(mp, WINDOW_PROTECT(port_num), mp->shared->win_protect);
 
+	mp->shared_smi = mp->shared;
+	if (pd->shared_smi != NULL)
+		mp->shared_smi = platform_get_drvdata(pd->shared_smi);
+
 	/* set default config values */
 	eth_port_uc_addr_get(mp, dev->dev_addr);
 	mp->rx_ring_size = PORT_DEFAULT_RECEIVE_QUEUE_SIZE;
@@ -2986,15 +2992,16 @@ static void eth_port_reset(struct mv643xx_private *mp)
 static void eth_port_read_smi_reg(struct mv643xx_private *mp,
 				unsigned int phy_reg, unsigned int *value)
 {
+	void __iomem *smi_reg = mp->shared_smi->eth_base + SMI_REG;
 	int phy_addr = ethernet_phy_get(mp);
 	unsigned long flags;
 	int i;
 
 	/* the SMI register is a shared resource */
-	spin_lock_irqsave(&mp->shared->phy_lock, flags);
+	spin_lock_irqsave(&mp->shared_smi->phy_lock, flags);
 
 	/* wait for the SMI register to become available */
-	for (i = 0; rdl(mp, SMI_REG) & ETH_SMI_BUSY; i++) {
+	for (i = 0; readl(smi_reg) & ETH_SMI_BUSY; i++) {
 		if (i == PHY_WAIT_ITERATIONS) {
 			printk("%s: PHY busy timeout\n", mp->dev->name);
 			goto out;
@@ -3002,11 +3009,11 @@ static void eth_port_read_smi_reg(struct mv643xx_private *mp,
 		udelay(PHY_WAIT_MICRO_SECONDS);
 	}
 
-	wrl(mp, SMI_REG,
-		(phy_addr << 16) | (phy_reg << 21) | ETH_SMI_OPCODE_READ);
+	writel((phy_addr << 16) | (phy_reg << 21) | ETH_SMI_OPCODE_READ,
+		smi_reg);
 
 	/* now wait for the data to be valid */
-	for (i = 0; !(rdl(mp, SMI_REG) & ETH_SMI_READ_VALID); i++) {
+	for (i = 0; !(readl(smi_reg) & ETH_SMI_READ_VALID); i++) {
 		if (i == PHY_WAIT_ITERATIONS) {
 			printk("%s: PHY read timeout\n", mp->dev->name);
 			goto out;
@@ -3014,9 +3021,9 @@ static void eth_port_read_smi_reg(struct mv643xx_private *mp,
 		udelay(PHY_WAIT_MICRO_SECONDS);
 	}
 
-	*value = rdl(mp, SMI_REG) & 0xffff;
+	*value = readl(smi_reg) & 0xffff;
 out:
-	spin_unlock_irqrestore(&mp->shared->phy_lock, flags);
+	spin_unlock_irqrestore(&mp->shared_smi->phy_lock, flags);
 }
 
 /*
@@ -3042,17 +3049,16 @@ out:
 static void eth_port_write_smi_reg(struct mv643xx_private *mp,
 				   unsigned int phy_reg, unsigned int value)
 {
-	int phy_addr;
-	int i;
+	void __iomem *smi_reg = mp->shared_smi->eth_base + SMI_REG;
+	int phy_addr = ethernet_phy_get(mp);
 	unsigned long flags;
-
-	phy_addr = ethernet_phy_get(mp);
+	int i;
 
 	/* the SMI register is a shared resource */
-	spin_lock_irqsave(&mp->shared->phy_lock, flags);
+	spin_lock_irqsave(&mp->shared_smi->phy_lock, flags);
 
 	/* wait for the SMI register to become available */
-	for (i = 0; rdl(mp, SMI_REG) & ETH_SMI_BUSY; i++) {
+	for (i = 0; readl(smi_reg) & ETH_SMI_BUSY; i++) {
 		if (i == PHY_WAIT_ITERATIONS) {
 			printk("%s: PHY busy timeout\n", mp->dev->name);
 			goto out;
@@ -3060,10 +3066,10 @@ static void eth_port_write_smi_reg(struct mv643xx_private *mp,
 		udelay(PHY_WAIT_MICRO_SECONDS);
 	}
 
-	wrl(mp, SMI_REG, (phy_addr << 16) | (phy_reg << 21) |
-				ETH_SMI_OPCODE_WRITE | (value & 0xffff));
+	writel((phy_addr << 16) | (phy_reg << 21) |
+		ETH_SMI_OPCODE_WRITE | (value & 0xffff), smi_reg);
 out:
-	spin_unlock_irqrestore(&mp->shared->phy_lock, flags);
+	spin_unlock_irqrestore(&mp->shared_smi->phy_lock, flags);
 }
 
 /*
diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 66dc9571922a..a15cdd4a8e58 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -24,6 +24,8 @@ struct mv643xx_eth_platform_data {
 	struct platform_device	*shared;
 	int		port_number;
 
+	struct platform_device	*shared_smi;
+
 	u16		force_phy_addr;	/* force override if phy_addr == 0 */
 	u16		phy_addr;
 
-- 
cgit v1.2.3


From 98db6f193c93e9b4729215af2c9101210e11d26c Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Tue, 29 Apr 2008 22:38:48 +0200
Subject: x86: fix section mismatch in pci_scan_bus

Fix following section mismatch warning:
WARNING: vmlinux.o(.text+0x275616): Section mismatch in reference from the function pci_scan_bus() to the function .devinit.text:pci_scan_bus_parented()

The warning was seen with a CONFIG_DEBUG_SECTION_MISMATCH=y build.
The inline function pci_scan_bus refer to functions annotated
__devinit - so annotate it __devinit too.
This revealed a few x86 specific functions that were only
used from __init or __devinit context.
So annotate these __devinit and the warning was killed.

The added include in pci.h was not strictly required but
added to avoid being dependent on indirect includes.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Jesse Barnes <jbarnes@hobbes.lan>
---
 arch/x86/pci/common.c | 4 ++--
 include/linux/pci.h   | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 2a4d751818b7..88b5416cf009 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -489,7 +489,7 @@ void pcibios_disable_device (struct pci_dev *dev)
 		pcibios_disable_irq(dev);
 }
 
-struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
+struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
 {
 	struct pci_bus *bus = NULL;
 	struct pci_sysdata *sd;
@@ -512,7 +512,7 @@ struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
 	return bus;
 }
 
-struct pci_bus *pci_scan_bus_with_sysdata(int busno)
+struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno)
 {
 	return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 96acd0dae241..a59517b4930f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -44,6 +44,7 @@
 #include <linux/mod_devicetable.h>
 
 #include <linux/types.h>
+#include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/list.h>
 #include <linux/compiler.h>
@@ -474,7 +475,7 @@ extern struct pci_bus *pci_find_bus(int domain, int busnr);
 void pci_bus_add_devices(struct pci_bus *bus);
 struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
 				      struct pci_ops *ops, void *sysdata);
-static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops,
+static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
 					   void *sysdata)
 {
 	struct pci_bus *root_bus;
-- 
cgit v1.2.3


From 70b9f7dc1435412ca2b89b13a8353bd9915a7189 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel.send@gmail.com>
Date: Mon, 28 Apr 2008 16:27:23 -0700
Subject: x86/pci: remove flag in pci_cfg_space_size_ext

so let pci_cfg_space_size call it directly without flag.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/fixup.c |  2 +-
 drivers/pci/probe.c  | 33 +++++++++++++++++----------------
 include/linux/pci.h  |  2 +-
 3 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index b60b2abd480c..ff3a6a336342 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -502,7 +502,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
  */
 static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
 {
-	dev->cfg_size = pci_cfg_space_size_ext(dev, 0);
+	dev->cfg_size = pci_cfg_space_size_ext(dev);
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 4a55bf380957..3706ce7972dd 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -842,13 +842,25 @@ static void set_pcie_port_type(struct pci_dev *pdev)
  * reading the dword at 0x100 which must either be 0 or a valid extended
  * capability header.
  */
-int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix)
+int pci_cfg_space_size_ext(struct pci_dev *dev)
 {
-	int pos;
 	u32 status;
 
-	if (!check_exp_pcix)
-		goto skip;
+	if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
+		goto fail;
+	if (status == 0xffffffff)
+		goto fail;
+
+	return PCI_CFG_SPACE_EXP_SIZE;
+
+ fail:
+	return PCI_CFG_SPACE_SIZE;
+}
+
+int pci_cfg_space_size(struct pci_dev *dev)
+{
+	int pos;
+	u32 status;
 
 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 	if (!pos) {
@@ -861,23 +873,12 @@ int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix)
 			goto fail;
 	}
 
- skip:
-	if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
-		goto fail;
-	if (status == 0xffffffff)
-		goto fail;
-
-	return PCI_CFG_SPACE_EXP_SIZE;
+	return pci_cfg_space_size_ext(dev);
 
  fail:
 	return PCI_CFG_SPACE_SIZE;
 }
 
-int pci_cfg_space_size(struct pci_dev *dev)
-{
-	return pci_cfg_space_size_ext(dev, 1);
-}
-
 static void pci_release_bus_bridge_dev(struct device *dev)
 {
 	kfree(dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a59517b4930f..509159bcd4e7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -667,7 +667,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
 
 void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
 		  void *userdata);
-int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix);
+int pci_cfg_space_size_ext(struct pci_dev *dev);
 int pci_cfg_space_size(struct pci_dev *dev);
 unsigned char pci_bus_max_busnr(struct pci_bus *bus);
 
-- 
cgit v1.2.3


From b41e5fffe8b81fc939067d8c1c195cc79115d5a3 Mon Sep 17 00:00:00 2001
From: Emil Medve <Emilian.Medve@Freescale.com>
Date: Sat, 3 May 2008 06:34:04 +1000
Subject: [POWERPC] devres: Add devm_ioremap_prot()

We provide an ioremap_flags, so this provides a corresponding
devm_ioremap_prot.  The slight name difference is at Ben
Herrenschmidt's request as he plans on changing ioremap_flags to
ioremap_prot in the future.

Signed-off-by: Emil Medve <Emilian.Medve@Freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
Acked-by: Tejun Heo <htejun@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/Makefile |  1 +
 arch/powerpc/lib/devres.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/asm-powerpc/io.h  |  8 +++++++-
 include/linux/io.h        |  1 +
 lib/devres.c              |  2 +-
 5 files changed, 52 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/lib/devres.c

(limited to 'include/linux')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 4bb023f4c869..f1d2cdc5331b 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_SMP)	+= locks.o
 endif
 
 obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
+obj-$(CONFIG_HAS_IOMEM)	+= devres.o
diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c
new file mode 100644
index 000000000000..292115d98ea9
--- /dev/null
+++ b/arch/powerpc/lib/devres.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/device.h>	/* devres_*(), devm_ioremap_release() */
+#include <linux/io.h>		/* ioremap_flags() */
+#include <linux/module.h>	/* EXPORT_SYMBOL() */
+
+/**
+ * devm_ioremap_prot - Managed ioremap_flags()
+ * @dev: Generic device to remap IO address for
+ * @offset: BUS offset to map
+ * @size: Size of map
+ * @flags: Page flags
+ *
+ * Managed ioremap_prot().  Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
+				 size_t size, unsigned long flags)
+{
+	void __iomem **ptr, *addr;
+
+	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	addr = ioremap_flags(offset, size, flags);
+	if (addr) {
+		*ptr = addr;
+		devres_add(dev, ptr);
+	} else
+		devres_free(ptr);
+
+	return addr;
+}
+EXPORT_SYMBOL(devm_ioremap_prot);
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index afae0697e8ce..e0062d73db1c 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -2,7 +2,7 @@
 #define _ASM_POWERPC_IO_H
 #ifdef __KERNEL__
 
-/* 
+/*
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
@@ -18,6 +18,9 @@ extern int check_legacy_ioport(unsigned long base_port);
 #define _PNPWRP		0xa79
 #define PNPBIOS_BASE	0xf000
 
+#include <linux/device.h>
+#include <linux/io.h>
+
 #include <linux/compiler.h>
 #include <asm/page.h>
 #include <asm/byteorder.h>
@@ -744,6 +747,9 @@ static inline void * bus_to_virt(unsigned long address)
 
 #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set)
 
+void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
+				size_t size, unsigned long flags);
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_POWERPC_IO_H */
diff --git a/include/linux/io.h b/include/linux/io.h
index 3a03a3604cce..6c7f0ba0d5fa 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -65,5 +65,6 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
 void devm_iounmap(struct device *dev, void __iomem *addr);
 int check_signature(const volatile void __iomem *io_addr,
 			const unsigned char *signature, int length);
+void devm_ioremap_release(struct device *dev, void *res);
 
 #endif /* _LINUX_IO_H */
diff --git a/lib/devres.c b/lib/devres.c
index 26c87c49d776..72c8909006da 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -2,7 +2,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 
-static void devm_ioremap_release(struct device *dev, void *res)
+void devm_ioremap_release(struct device *dev, void *res)
 {
 	iounmap(*(void __iomem **)res);
 }
-- 
cgit v1.2.3


From 688b744d8bc84dc5cc646e97509113dc5e8818ed Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 24 Apr 2008 16:57:23 -0500
Subject: kgdb: fix signedness mixmatches, add statics, add declaration to
 header

Noticed by sparse:
arch/x86/kernel/kgdb.c:556:15: warning: symbol 'kgdb_arch_pc' was not declared. Should it be static?
kernel/kgdb.c:149:8: warning: symbol 'kgdb_do_roundup' was not declared. Should it be static?
kernel/kgdb.c:193:22: warning: symbol 'kgdb_arch_pc' was not declared. Should it be static?
kernel/kgdb.c:712:5: warning: symbol 'remove_all_break' was not declared. Should it be static?

Related to kgdb_hex2long:
arch/x86/kernel/kgdb.c:371:28: warning: incorrect type in argument 2 (different signedness)
arch/x86/kernel/kgdb.c:371:28:    expected long *long_val
arch/x86/kernel/kgdb.c:371:28:    got unsigned long *<noident>
kernel/kgdb.c:469:27: warning: incorrect type in argument 2 (different signedness)
kernel/kgdb.c:469:27:    expected long *long_val
kernel/kgdb.c:469:27:    got unsigned long *<noident>
kernel/kgdb.c:470:27: warning: incorrect type in argument 2 (different signedness)
kernel/kgdb.c:470:27:    expected long *long_val
kernel/kgdb.c:470:27:    got unsigned long *<noident>
kernel/kgdb.c:894:27: warning: incorrect type in argument 2 (different signedness)
kernel/kgdb.c:894:27:    expected long *long_val
kernel/kgdb.c:894:27:    got unsigned long *<noident>
kernel/kgdb.c:895:27: warning: incorrect type in argument 2 (different signedness)
kernel/kgdb.c:895:27:    expected long *long_val
kernel/kgdb.c:895:27:    got unsigned long *<noident>
kernel/kgdb.c:1127:28: warning: incorrect type in argument 2 (different signedness)
kernel/kgdb.c:1127:28:    expected long *long_val
kernel/kgdb.c:1127:28:    got unsigned long *<noident>
kernel/kgdb.c:1132:25: warning: incorrect type in argument 2 (different signedness)
kernel/kgdb.c:1132:25:    expected long *long_val
kernel/kgdb.c:1132:25:    got unsigned long *<noident>

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h | 4 +++-
 kernel/kgdb.c        | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 9757b1a6d9dc..6adcc297e354 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -261,10 +261,12 @@ struct kgdb_io {
 
 extern struct kgdb_arch		arch_kgdb_ops;
 
+extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
+
 extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
 extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
 
-extern int kgdb_hex2long(char **ptr, long *long_val);
+extern int kgdb_hex2long(char **ptr, unsigned long *long_val);
 extern int kgdb_mem2hex(char *mem, char *buf, int count);
 extern int kgdb_hex2mem(char *buf, char *mem, int count);
 
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 1bd0ec1c80b2..39e31a036f5b 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -61,7 +61,7 @@ struct kgdb_state {
 	int			err_code;
 	int			cpu;
 	int			pass_exception;
-	long			threadid;
+	unsigned long		threadid;
 	long			kgdb_usethreadid;
 	struct pt_regs		*linux_regs;
 };
@@ -146,7 +146,7 @@ atomic_t			kgdb_cpu_doing_single_step = ATOMIC_INIT(-1);
  * the other CPUs might interfere with your debugging context, so
  * use this with care:
  */
-int				kgdb_do_roundup = 1;
+static int kgdb_do_roundup = 1;
 
 static int __init opt_nokgdbroundup(char *str)
 {
@@ -438,7 +438,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
  * While we find nice hex chars, build a long_val.
  * Return number of chars processed.
  */
-int kgdb_hex2long(char **ptr, long *long_val)
+int kgdb_hex2long(char **ptr, unsigned long *long_val)
 {
 	int hex_val;
 	int num = 0;
@@ -709,7 +709,7 @@ int kgdb_isremovedbreak(unsigned long addr)
 	return 0;
 }
 
-int remove_all_break(void)
+static int remove_all_break(void)
 {
 	unsigned long addr;
 	int error;
-- 
cgit v1.2.3


From 8ae121ac8666b0421aa20fd80d4597ec66fa54bc Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Wed, 23 Apr 2008 07:13:29 -0400
Subject: sched: fix RT task-wakeup logic

Dmitry Adamushko pointed out a logic error in task_wake_up_rt() where we
will always evaluate to "true".  You can find the thread here:

http://lkml.org/lkml/2008/4/22/296

In reality, we only want to try to push tasks away when a wake up request is
not going to preempt the current task.  So lets fix it.

Note: We introduce test_tsk_need_resched() instead of open-coding the flag
check so that the merge-conflict with -rt should help remind us that we
may need to support NEEDS_RESCHED_DELAYED in the future, too.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
CC: Dmitry Adamushko <dmitry.adamushko@gmail.com>
CC: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 7 ++++++-
 kernel/sched_rt.c     | 7 +++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 03c238088aee..698b5a4d25a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1977,6 +1977,11 @@ static inline void clear_tsk_need_resched(struct task_struct *tsk)
 	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
 }
 
+static inline int test_tsk_need_resched(struct task_struct *tsk)
+{
+	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+}
+
 static inline int signal_pending(struct task_struct *p)
 {
 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
@@ -1991,7 +1996,7 @@ static inline int fatal_signal_pending(struct task_struct *p)
 
 static inline int need_resched(void)
 {
-	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
+	return unlikely(test_tsk_need_resched(current));
 }
 
 /*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index dcd649588593..060e87b0cb1c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1098,11 +1098,14 @@ static void post_schedule_rt(struct rq *rq)
 	}
 }
 
-
+/*
+ * If we are not running and we are not going to reschedule soon, we should
+ * try to push tasks away now
+ */
 static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
-	    (p->prio >= rq->rt.highest_prio) &&
+	    !test_tsk_need_resched(rq->curr) &&
 	    rq->rt.overloaded)
 		push_rt_tasks(rq);
 }
-- 
cgit v1.2.3


From 690229a0912ca2fef8b542fe4d8b73acfcdc6e24 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 23 Apr 2008 09:31:35 +0200
Subject: sched: make clock sync tunable by architecture code

make time_sync_thresh tunable to architecture code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 ++
 kernel/sched.c        | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 698b5a4d25a7..54c9ca26b7d8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -158,6 +158,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 }
 #endif
 
+extern unsigned long long time_sync_thresh;
+
 /*
  * Task state bitmask. NOTE! These bits are also
  * encoded in fs/proc/array.c: get_task_state().
diff --git a/kernel/sched.c b/kernel/sched.c
index 3ac3d7af04a1..8f433fedfcb3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -899,7 +899,7 @@ static inline u64 global_rt_runtime(void)
 	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
 }
 
-static const unsigned long long time_sync_thresh = 100000;
+unsigned long long time_sync_thresh = 100000;
 
 static DEFINE_PER_CPU(unsigned long long, time_offset);
 static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
-- 
cgit v1.2.3


From 3e51f33fcc7f55e6df25d15b55ed10c8b4da84cd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sat, 3 May 2008 18:29:28 +0200
Subject: sched: add optional support for CONFIG_HAVE_UNSTABLE_SCHED_CLOCK

this replaces the rq->clock stuff (and possibly cpu_clock()).

 - architectures that have an 'imperfect' hardware clock can set
   CONFIG_HAVE_UNSTABLE_SCHED_CLOCK

 - the 'jiffie' window might be superfulous when we update tick_gtod
   before the __update_sched_clock() call in sched_clock_tick()

 - cpu_clock() might be implemented as:

     sched_clock_cpu(smp_processor_id())

   if the accuracy proves good enough - how far can TSC drift in a
   single jiffie when considering the filtering and idle hooks?

[ mingo@elte.hu: various fixes and cleanups ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  29 +++++++
 init/main.c           |   1 +
 kernel/Makefile       |   2 +-
 kernel/sched.c        | 165 +++--------------------------------
 kernel/sched_clock.c  | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched_debug.c  |   7 --
 kernel/sched_fair.c   |   2 +-
 7 files changed, 281 insertions(+), 161 deletions(-)
 create mode 100644 kernel/sched_clock.c

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 54c9ca26b7d8..0c35b0343a76 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1553,6 +1553,35 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 
 extern unsigned long long sched_clock(void);
 
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline void sched_clock_init(void)
+{
+}
+
+static inline u64 sched_clock_cpu(int cpu)
+{
+	return sched_clock();
+}
+
+static inline void sched_clock_tick(void)
+{
+}
+
+static inline void sched_clock_idle_sleep_event(void)
+{
+}
+
+static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+}
+#else
+extern void sched_clock_init(void);
+extern u64 sched_clock_cpu(int cpu);
+extern void sched_clock_tick(void);
+extern void sched_clock_idle_sleep_event(void);
+extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+#endif
+
 /*
  * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
  * clock constructed from sched_clock():
diff --git a/init/main.c b/init/main.c
index a87d4ca5c36c..ddada7acf363 100644
--- a/init/main.c
+++ b/init/main.c
@@ -602,6 +602,7 @@ asmlinkage void __init start_kernel(void)
 	softirq_init();
 	timekeeping_init();
 	time_init();
+	sched_clock_init();
 	profile_init();
 	if (!irqs_disabled())
 		printk("start_kernel(): bug: interrupts were enabled early\n");
diff --git a/kernel/Makefile b/kernel/Makefile
index 188c43223f52..1c9938addb9d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
-	    notifier.o ksysfs.o pm_qos_params.o
+	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o
 
 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/kernel/sched.c b/kernel/sched.c
index 9457106b18af..58fb8af15776 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -74,16 +74,6 @@
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
 
-/*
- * Scheduler clock - returns current time in nanosec units.
- * This is default implementation.
- * Architectures and sub-architectures can override this.
- */
-unsigned long long __attribute__((weak)) sched_clock(void)
-{
-	return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
-}
-
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -557,13 +547,7 @@ struct rq {
 	unsigned long next_balance;
 	struct mm_struct *prev_mm;
 
-	u64 clock, prev_clock_raw;
-	s64 clock_max_delta;
-
-	unsigned int clock_warps, clock_overflows, clock_underflows;
-	u64 idle_clock;
-	unsigned int clock_deep_idle_events;
-	u64 tick_timestamp;
+	u64 clock;
 
 	atomic_t nr_iowait;
 
@@ -628,82 +612,6 @@ static inline int cpu_of(struct rq *rq)
 #endif
 }
 
-#ifdef CONFIG_NO_HZ
-static inline bool nohz_on(int cpu)
-{
-	return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE;
-}
-
-static inline u64 max_skipped_ticks(struct rq *rq)
-{
-	return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1;
-}
-
-static inline void update_last_tick_seen(struct rq *rq)
-{
-	rq->last_tick_seen = jiffies;
-}
-#else
-static inline u64 max_skipped_ticks(struct rq *rq)
-{
-	return 1;
-}
-
-static inline void update_last_tick_seen(struct rq *rq)
-{
-}
-#endif
-
-/*
- * Update the per-runqueue clock, as finegrained as the platform can give
- * us, but without assuming monotonicity, etc.:
- */
-static void __update_rq_clock(struct rq *rq)
-{
-	u64 prev_raw = rq->prev_clock_raw;
-	u64 now = sched_clock();
-	s64 delta = now - prev_raw;
-	u64 clock = rq->clock;
-
-#ifdef CONFIG_SCHED_DEBUG
-	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-#endif
-	/*
-	 * Protect against sched_clock() occasionally going backwards:
-	 */
-	if (unlikely(delta < 0)) {
-		clock++;
-		rq->clock_warps++;
-	} else {
-		/*
-		 * Catch too large forward jumps too:
-		 */
-		u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC;
-		u64 max_time = rq->tick_timestamp + max_jump;
-
-		if (unlikely(clock + delta > max_time)) {
-			if (clock < max_time)
-				clock = max_time;
-			else
-				clock++;
-			rq->clock_overflows++;
-		} else {
-			if (unlikely(delta > rq->clock_max_delta))
-				rq->clock_max_delta = delta;
-			clock += delta;
-		}
-	}
-
-	rq->prev_clock_raw = now;
-	rq->clock = clock;
-}
-
-static void update_rq_clock(struct rq *rq)
-{
-	if (likely(smp_processor_id() == cpu_of(rq)))
-		__update_rq_clock(rq);
-}
-
 /*
  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
  * See detach_destroy_domains: synchronize_sched for details.
@@ -719,6 +627,11 @@ static void update_rq_clock(struct rq *rq)
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 
+static inline void update_rq_clock(struct rq *rq)
+{
+	rq->clock = sched_clock_cpu(cpu_of(rq));
+}
+
 /*
  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
  */
@@ -935,7 +848,6 @@ static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
 static unsigned long long __cpu_clock(int cpu)
 {
 	unsigned long long now;
-	struct rq *rq;
 
 	/*
 	 * Only call sched_clock() if the scheduler has already been
@@ -944,9 +856,7 @@ static unsigned long long __cpu_clock(int cpu)
 	if (unlikely(!scheduler_running))
 		return 0;
 
-	rq = cpu_rq(cpu);
-	update_rq_clock(rq);
-	now = rq->clock;
+	now = sched_clock_cpu(cpu);
 
 	return now;
 }
@@ -1120,45 +1030,6 @@ static struct rq *this_rq_lock(void)
 	return rq;
 }
 
-/*
- * We are going deep-idle (irqs are disabled):
- */
-void sched_clock_idle_sleep_event(void)
-{
-	struct rq *rq = cpu_rq(smp_processor_id());
-
-	WARN_ON(!irqs_disabled());
-	spin_lock(&rq->lock);
-	__update_rq_clock(rq);
-	spin_unlock(&rq->lock);
-	rq->clock_deep_idle_events++;
-}
-EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
-
-/*
- * We just idled delta nanoseconds (called with irqs disabled):
- */
-void sched_clock_idle_wakeup_event(u64 delta_ns)
-{
-	struct rq *rq = cpu_rq(smp_processor_id());
-	u64 now = sched_clock();
-
-	WARN_ON(!irqs_disabled());
-	rq->idle_clock += delta_ns;
-	/*
-	 * Override the previous timestamp and ignore all
-	 * sched_clock() deltas that occured while we idled,
-	 * and use the PM-provided delta_ns to advance the
-	 * rq clock:
-	 */
-	spin_lock(&rq->lock);
-	rq->prev_clock_raw = now;
-	rq->clock += delta_ns;
-	spin_unlock(&rq->lock);
-	touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
-
 static void __resched_task(struct task_struct *p, int tif_bit);
 
 static inline void resched_task(struct task_struct *p)
@@ -1283,7 +1154,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
 	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
 
 	spin_lock(&rq->lock);
-	__update_rq_clock(rq);
+	update_rq_clock(rq);
 	rq->curr->sched_class->task_tick(rq, rq->curr, 1);
 	spin_unlock(&rq->lock);
 
@@ -4476,19 +4347,11 @@ void scheduler_tick(void)
 	int cpu = smp_processor_id();
 	struct rq *rq = cpu_rq(cpu);
 	struct task_struct *curr = rq->curr;
-	u64 next_tick = rq->tick_timestamp + TICK_NSEC;
+
+	sched_clock_tick();
 
 	spin_lock(&rq->lock);
-	__update_rq_clock(rq);
-	/*
-	 * Let rq->clock advance by at least TICK_NSEC:
-	 */
-	if (unlikely(rq->clock < next_tick)) {
-		rq->clock = next_tick;
-		rq->clock_underflows++;
-	}
-	rq->tick_timestamp = rq->clock;
-	update_last_tick_seen(rq);
+	update_rq_clock(rq);
 	update_cpu_load(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
 	spin_unlock(&rq->lock);
@@ -4642,7 +4505,7 @@ need_resched_nonpreemptible:
 	 * Do the rq-clock update outside the rq lock:
 	 */
 	local_irq_disable();
-	__update_rq_clock(rq);
+	update_rq_clock(rq);
 	spin_lock(&rq->lock);
 	clear_tsk_need_resched(prev);
 
@@ -8226,8 +8089,6 @@ void __init sched_init(void)
 		spin_lock_init(&rq->lock);
 		lockdep_set_class(&rq->lock, &rq->rq_lock_key);
 		rq->nr_running = 0;
-		rq->clock = 1;
-		update_last_tick_seen(rq);
 		init_cfs_rq(&rq->cfs, rq);
 		init_rt_rq(&rq->rt, rq);
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8371,6 +8232,7 @@ EXPORT_SYMBOL(__might_sleep);
 static void normalize_task(struct rq *rq, struct task_struct *p)
 {
 	int on_rq;
+
 	update_rq_clock(rq);
 	on_rq = p->se.on_rq;
 	if (on_rq)
@@ -8402,7 +8264,6 @@ void normalize_rt_tasks(void)
 		p->se.sleep_start		= 0;
 		p->se.block_start		= 0;
 #endif
-		task_rq(p)->clock		= 0;
 
 		if (!rt_task(p)) {
 			/*
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
new file mode 100644
index 000000000000..9c597e37f7de
--- /dev/null
+++ b/kernel/sched_clock.c
@@ -0,0 +1,236 @@
+/*
+ * sched_clock for unstable cpu clocks
+ *
+ *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Based on code by:
+ *   Ingo Molnar <mingo@redhat.com>
+ *   Guillaume Chazarain <guichaz@gmail.com>
+ *
+ * Create a semi stable clock from a mixture of other events, including:
+ *  - gtod
+ *  - jiffies
+ *  - sched_clock()
+ *  - explicit idle events
+ *
+ * We use gtod as base and the unstable clock deltas. The deltas are filtered,
+ * making it monotonic and keeping it within an expected window.  This window
+ * is set up using jiffies.
+ *
+ * Furthermore, explicit sleep and wakeup hooks allow us to account for time
+ * that is otherwise invisible (TSC gets stopped).
+ *
+ * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
+ * consistent between cpus (never more than 1 jiffies difference).
+ */
+#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+
+
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+
+struct sched_clock_data {
+	/*
+	 * Raw spinlock - this is a special case: this might be called
+	 * from within instrumentation code so we dont want to do any
+	 * instrumentation ourselves.
+	 */
+	raw_spinlock_t		lock;
+
+	unsigned long		prev_jiffies;
+	u64			prev_raw;
+	u64			tick_raw;
+	u64			tick_gtod;
+	u64			clock;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
+
+static inline struct sched_clock_data *this_scd(void)
+{
+	return &__get_cpu_var(sched_clock_data);
+}
+
+static inline struct sched_clock_data *cpu_sdc(int cpu)
+{
+	return &per_cpu(sched_clock_data, cpu);
+}
+
+void sched_clock_init(void)
+{
+	u64 ktime_now = ktime_to_ns(ktime_get());
+	u64 now = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct sched_clock_data *scd = cpu_sdc(cpu);
+
+		scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+		scd->prev_jiffies = jiffies;
+		scd->prev_raw = now;
+		scd->tick_raw = now;
+		scd->tick_gtod = ktime_now;
+		scd->clock = ktime_now;
+	}
+}
+
+/*
+ * update the percpu scd from the raw @now value
+ *
+ *  - filter out backward motion
+ *  - use jiffies to generate a min,max window to clip the raw values
+ */
+static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
+{
+	unsigned long now_jiffies = jiffies;
+	long delta_jiffies = now_jiffies - scd->prev_jiffies;
+	u64 clock = scd->clock;
+	u64 min_clock, max_clock;
+	s64 delta = now - scd->prev_raw;
+
+	WARN_ON_ONCE(!irqs_disabled());
+	min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
+
+	if (unlikely(delta < 0)) {
+		clock++;
+		goto out;
+	}
+
+	max_clock = min_clock + TICK_NSEC;
+
+	if (unlikely(clock + delta > max_clock)) {
+		if (clock < max_clock)
+			clock = max_clock;
+		else
+			clock++;
+	} else {
+		clock += delta;
+	}
+
+ out:
+	if (unlikely(clock < min_clock))
+		clock = min_clock;
+
+	scd->prev_raw = now;
+	scd->prev_jiffies = now_jiffies;
+	scd->clock = clock;
+}
+
+static void lock_double_clock(struct sched_clock_data *data1,
+				struct sched_clock_data *data2)
+{
+	if (data1 < data2) {
+		__raw_spin_lock(&data1->lock);
+		__raw_spin_lock(&data2->lock);
+	} else {
+		__raw_spin_lock(&data2->lock);
+		__raw_spin_lock(&data1->lock);
+	}
+}
+
+u64 sched_clock_cpu(int cpu)
+{
+	struct sched_clock_data *scd = cpu_sdc(cpu);
+	u64 now, clock;
+
+	WARN_ON_ONCE(!irqs_disabled());
+	now = sched_clock();
+
+	if (cpu != raw_smp_processor_id()) {
+		/*
+		 * in order to update a remote cpu's clock based on our
+		 * unstable raw time rebase it against:
+		 *   tick_raw		(offset between raw counters)
+		 *   tick_gotd          (tick offset between cpus)
+		 */
+		struct sched_clock_data *my_scd = this_scd();
+
+		lock_double_clock(scd, my_scd);
+
+		now -= my_scd->tick_raw;
+		now += scd->tick_raw;
+
+		now -= my_scd->tick_gtod;
+		now += scd->tick_gtod;
+
+		__raw_spin_unlock(&my_scd->lock);
+	} else {
+		__raw_spin_lock(&scd->lock);
+	}
+
+	__update_sched_clock(scd, now);
+	clock = scd->clock;
+
+	__raw_spin_unlock(&scd->lock);
+
+	return clock;
+}
+
+void sched_clock_tick(void)
+{
+	struct sched_clock_data *scd = this_scd();
+	u64 now, now_gtod;
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	now = sched_clock();
+	now_gtod = ktime_to_ns(ktime_get());
+
+	__raw_spin_lock(&scd->lock);
+	__update_sched_clock(scd, now);
+	/*
+	 * update tick_gtod after __update_sched_clock() because that will
+	 * already observe 1 new jiffy; adding a new tick_gtod to that would
+	 * increase the clock 2 jiffies.
+	 */
+	scd->tick_raw = now;
+	scd->tick_gtod = now_gtod;
+	__raw_spin_unlock(&scd->lock);
+}
+
+/*
+ * We are going deep-idle (irqs are disabled):
+ */
+void sched_clock_idle_sleep_event(void)
+{
+	sched_clock_cpu(smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
+
+/*
+ * We just idled delta nanoseconds (called with irqs disabled):
+ */
+void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+	struct sched_clock_data *scd = this_scd();
+	u64 now = sched_clock();
+
+	/*
+	 * Override the previous timestamp and ignore all
+	 * sched_clock() deltas that occured while we idled,
+	 * and use the PM-provided delta_ns to advance the
+	 * rq clock:
+	 */
+	__raw_spin_lock(&scd->lock);
+	scd->prev_raw = now;
+	scd->clock += delta_ns;
+	__raw_spin_unlock(&scd->lock);
+
+	touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
+
+#endif
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * This is default implementation.
+ * Architectures and sub-architectures can override this.
+ */
+unsigned long long __attribute__((weak)) sched_clock(void)
+{
+	return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+}
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6b4a12558e88..5f06118fbc31 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -204,13 +204,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 	PN(next_balance);
 	P(curr->pid);
 	PN(clock);
-	PN(idle_clock);
-	PN(prev_clock_raw);
-	P(clock_warps);
-	P(clock_overflows);
-	P(clock_underflows);
-	P(clock_deep_idle_events);
-	PN(clock_max_delta);
 	P(cpu_load[0]);
 	P(cpu_load[1]);
 	P(cpu_load[2]);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index d99e01f6929a..c863663d204d 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -959,7 +959,7 @@ static void yield_task_fair(struct rq *rq)
 		return;
 
 	if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
-		__update_rq_clock(rq);
+		update_rq_clock(rq);
 		/*
 		 * Update run-time statistics of the 'current'.
 		 */
-- 
cgit v1.2.3


From 78ab88f04f44bed566d51dce0c7cbfeff6449a06 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 1 May 2008 23:41:41 +0900
Subject: libata: improve post-reset device ready test

Some controllers (jmb and inic162x) use 0x77 and 0x7f to indicate that
the device isn't ready yet.  It looks like they use 0xff if device
presence is detected but connection isn't established.  0x77 or 0x7f
after connection is established and use the value from signature FIS
after receiving it.

This patch implements ata_check_ready(), which takes TF status value
and determines whether the port is ready or not considering the above
and other conditions, and use it in @check_ready() functions.  This is
safe as both 0x77 and 0x7f aren't valid ready status value even though
they have BSY bit cleared.

This fixes hot plug detection failures which can be triggered with
certain drives if they aren't already spun up when the data connector
is hot plugged.

Tested on sil, sil24, ahci (jmb/ich), piix and inic162x combined with
eight drives from all major vendors.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c       |  4 +---
 drivers/ata/libata-sff.c |  6 +-----
 include/linux/libata.h   | 15 +++++++++++++++
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 8cace9aa9c03..97f83fb2ee2e 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1267,9 +1267,7 @@ static int ahci_check_ready(struct ata_link *link)
 	void __iomem *port_mmio = ahci_port_base(link->ap);
 	u8 status = readl(port_mmio + PORT_TFDATA) & 0xFF;
 
-	if (!(status & ATA_BUSY))
-		return 1;
-	return 0;
+	return ata_check_ready(status);
 }
 
 static int ahci_softreset(struct ata_link *link, unsigned int *class,
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 2ec65a8fda79..3c2d2289f85e 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -314,11 +314,7 @@ static int ata_sff_check_ready(struct ata_link *link)
 {
 	u8 status = link->ap->ops->sff_check_status(link->ap);
 
-	if (!(status & ATA_BUSY))
-		return 1;
-	if (status == 0xff)
-		return -ENODEV;
-	return 0;
+	return ata_check_ready(status);
 }
 
 /**
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d1dfe872ee30..95e6159b44cf 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1381,6 +1381,21 @@ static inline struct ata_port *ata_shost_to_port(struct Scsi_Host *host)
 	return *(struct ata_port **)&host->hostdata[0];
 }
 
+static inline int ata_check_ready(u8 status)
+{
+	/* Some controllers report 0x77 or 0x7f during intermediate
+	 * not-ready stages.
+	 */
+	if (status == 0x77 || status == 0x7f)
+		return 0;
+
+	/* 0xff indicates either no device or device not ready */
+	if (status == 0xff)
+		return -ENODEV;
+
+	return !(status & ATA_BUSY);
+}
+
 
 /**************************************************************************
  * PMP - drivers/ata/libata-pmp.c
-- 
cgit v1.2.3


From 10acf3b0d3b46c6ef5d6f0722f72ad9b743ea848 Mon Sep 17 00:00:00 2001
From: Mark Lord <liml@rtr.ca>
Date: Fri, 2 May 2008 02:14:53 -0400
Subject: libata: export ata_eh_analyze_ncq_error

Export ata_eh_analyze_ncq_error() for subsequent use by sata_mv,
as suggested by Tejun.

Signed-off-by: Mark Lord <mlord@pobox.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 1 +
 drivers/ata/libata-eh.c   | 2 +-
 include/linux/libata.h    | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 3bc488538204..927b692d723c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6292,6 +6292,7 @@ EXPORT_SYMBOL_GPL(ata_eh_freeze_port);
 EXPORT_SYMBOL_GPL(ata_eh_thaw_port);
 EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
 EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
+EXPORT_SYMBOL_GPL(ata_eh_analyze_ncq_error);
 EXPORT_SYMBOL_GPL(ata_do_eh);
 EXPORT_SYMBOL_GPL(ata_std_error_handler);
 
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 61dcd0026c64..62e033146bed 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1357,7 +1357,7 @@ static void ata_eh_analyze_serror(struct ata_link *link)
  *	LOCKING:
  *	Kernel thread context (may sleep).
  */
-static void ata_eh_analyze_ncq_error(struct ata_link *link)
+void ata_eh_analyze_ncq_error(struct ata_link *link)
 {
 	struct ata_port *ap = link->ap;
 	struct ata_eh_context *ehc = &link->eh_context;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 95e6159b44cf..7e206da1fbfb 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1039,6 +1039,7 @@ extern void ata_eh_thaw_port(struct ata_port *ap);
 
 extern void ata_eh_qc_complete(struct ata_queued_cmd *qc);
 extern void ata_eh_qc_retry(struct ata_queued_cmd *qc);
+extern void ata_eh_analyze_ncq_error(struct ata_link *link);
 
 extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
 		      ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
-- 
cgit v1.2.3


From 9b9a8bfc8dfbe09dc57f274e32e8b06151abbad7 Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Fri, 2 May 2008 13:00:51 -0500
Subject: phylib: Fix some sparse warnings

Declared some things static, declared some things in the header.

Signed-off-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/net/phy/phy.c | 2 +-
 include/linux/phy.h   | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 3c18bb594957..45cc2914d347 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -547,7 +547,7 @@ static void phy_force_reduction(struct phy_device *phydev)
  * Must not be called from interrupt context, or while the
  * phydev->lock is held.
  */
-void phy_error(struct phy_device *phydev)
+static void phy_error(struct phy_device *phydev)
 {
 	mutex_lock(&phydev->lock);
 	phydev->state = PHY_HALTED;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 02df20f085fe..7224c4099a28 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -412,6 +412,8 @@ int mdiobus_register(struct mii_bus *bus);
 void mdiobus_unregister(struct mii_bus *bus);
 void phy_sanitize_settings(struct phy_device *phydev);
 int phy_stop_interrupts(struct phy_device *phydev);
+int phy_enable_interrupts(struct phy_device *phydev);
+int phy_disable_interrupts(struct phy_device *phydev);
 
 static inline int phy_read_status(struct phy_device *phydev) {
 	return phydev->drv->read_status(phydev);
@@ -447,5 +449,8 @@ int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
 		int (*run)(struct phy_device *));
 int phy_scan_fixups(struct phy_device *phydev);
 
+int __init mdio_bus_init(void);
+void mdio_bus_exit(void);
+
 extern struct bus_type mdio_bus_type;
 #endif /* __PHY_H */
-- 
cgit v1.2.3


From 33dcdac2df54e66c447ae03f58c95c7251aa5649 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 29 Apr 2008 17:46:26 +0200
Subject: [PATCH] kill ->put_inode

And with that last patch to affs killing the last put_inode instance we
can finally, after many years of transition kill this racy and awkward
interface.

(It's kinda funny that even the description in
Documentation/filesystems/vfs.txt was entirely wrong..)

Also remove a very misleading comment above the defintion of
struct super_operations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking | 2 --
 Documentation/filesystems/vfs.txt | 4 ----
 fs/inode.c                        | 3 ---
 include/linux/fs.h                | 5 -----
 4 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index c2992bc54f2f..8b22d7d8b991 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -92,7 +92,6 @@ prototypes:
 	void (*destroy_inode)(struct inode *);
 	void (*dirty_inode) (struct inode *);
 	int (*write_inode) (struct inode *, int);
-	void (*put_inode) (struct inode *);
 	void (*drop_inode) (struct inode *);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
@@ -115,7 +114,6 @@ alloc_inode:		no	no	no
 destroy_inode:		no
 dirty_inode:		no				(must not sleep)
 write_inode:		no
-put_inode:		no
 drop_inode:		no				!!!inode_lock!!!
 delete_inode:		no
 put_super:		yes	yes	no
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 81e5be6e6e35..b7522c6cbae3 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -205,7 +205,6 @@ struct super_operations {
 
         void (*dirty_inode) (struct inode *);
         int (*write_inode) (struct inode *, int);
-        void (*put_inode) (struct inode *);
         void (*drop_inode) (struct inode *);
         void (*delete_inode) (struct inode *);
         void (*put_super) (struct super_block *);
@@ -246,9 +245,6 @@ or bottom half).
 	inode to disc.  The second parameter indicates whether the write
 	should be synchronous or not, not all filesystems check this flag.
 
-  put_inode: called when the VFS inode is removed from the inode
-	cache.
-
   drop_inode: called when the last access to the inode is dropped,
 	with the inode_lock spinlock held.
 
diff --git a/fs/inode.c b/fs/inode.c
index bf6478130424..18bdce14b70c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1153,9 +1153,6 @@ void iput(struct inode *inode)
 
 		BUG_ON(inode->i_state == I_CLEAR);
 
-		if (op && op->put_inode)
-			op->put_inode(inode);
-
 		if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
 			iput_final(inode);
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a1ba005d08e7..7e0fa9e64479 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1289,17 +1289,12 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
 		unsigned long, loff_t *);
 
-/*
- * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
- * without the big kernel lock held in all filesystems.
- */
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
 
    	void (*dirty_inode) (struct inode *);
 	int (*write_inode) (struct inode *, int);
-	void (*put_inode) (struct inode *);
 	void (*drop_inode) (struct inode *);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
-- 
cgit v1.2.3


From 7f3d4ee108c184ab215036051087aaaaa8de7661 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 7 May 2008 09:22:39 +0200
Subject: vfs: splice remove_suid() cleanup

generic_file_splice_write() duplicates remove_suid() just because it
doesn't hold i_mutex.  But it grabs i_mutex inside splice_from_pipe()
anyway, so this is rather pointless.

Move locking to generic_file_splice_write() and call remove_suid() and
__splice_from_pipe() instead.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c        | 29 +++++++++++++----------------
 include/linux/fs.h |  1 -
 mm/filemap.c       |  2 +-
 3 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/splice.c b/fs/splice.c
index 633f58ebfb72..cece15b4ef72 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -811,24 +811,19 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 {
 	struct address_space *mapping = out->f_mapping;
 	struct inode *inode = mapping->host;
-	int killsuid, killpriv;
+	struct splice_desc sd = {
+		.total_len = len,
+		.flags = flags,
+		.pos = *ppos,
+		.u.file = out,
+	};
 	ssize_t ret;
-	int err = 0;
-
-	killpriv = security_inode_need_killpriv(out->f_path.dentry);
-	killsuid = should_remove_suid(out->f_path.dentry);
-	if (unlikely(killsuid || killpriv)) {
-		mutex_lock(&inode->i_mutex);
-		if (killpriv)
-			err = security_inode_killpriv(out->f_path.dentry);
-		if (!err && killsuid)
-			err = __remove_suid(out->f_path.dentry, killsuid);
-		mutex_unlock(&inode->i_mutex);
-		if (err)
-			return err;
-	}
 
-	ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+	inode_double_lock(inode, pipe->inode);
+	ret = remove_suid(out->f_path.dentry);
+	if (likely(!ret))
+		ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
+	inode_double_unlock(inode, pipe->inode);
 	if (ret > 0) {
 		unsigned long nr_pages;
 
@@ -840,6 +835,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 		 * sync it.
 		 */
 		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+			int err;
+
 			mutex_lock(&inode->i_mutex);
 			err = generic_osync_inode(inode, mapping,
 						  OSYNC_METADATA|OSYNC_DATA);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e0fa9e64479..f413085f748e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1816,7 +1816,6 @@ extern void iget_failed(struct inode *);
 extern void clear_inode(struct inode *);
 extern void destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
-extern int __remove_suid(struct dentry *, int);
 extern int should_remove_suid(struct dentry *);
 extern int remove_suid(struct dentry *);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 239d36163bbe..2dead9adf8b7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1655,7 +1655,7 @@ int should_remove_suid(struct dentry *dentry)
 }
 EXPORT_SYMBOL(should_remove_suid);
 
-int __remove_suid(struct dentry *dentry, int kill)
+static int __remove_suid(struct dentry *dentry, int kill)
 {
 	struct iattr newattrs;
 
-- 
cgit v1.2.3


From 221e583a735fc5d879d83c2a76b8ee5afcbdf146 Mon Sep 17 00:00:00 2001
From: Rasmus Rohde <rohde@duff.dk>
Date: Wed, 30 Apr 2008 17:22:06 +0200
Subject: udf: Make udf exportable

Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Rasmus Rohde <rohde@duff.dk>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/namei.c           | 140 ++++++++++++++++++++++++++++++++++++++++++++++-
 fs/udf/super.c           |   1 +
 fs/udf/udfdecl.h         |   1 +
 include/linux/exportfs.h |  21 +++++++
 4 files changed, 161 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index ba5537d4bc15..47a6589e10b5 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,7 @@
 #include <linux/buffer_head.h>
 #include <linux/sched.h>
 #include <linux/crc-itu-t.h>
+#include <linux/exportfs.h>
 
 static inline int udf_match(int len1, const char *name1, int len2,
 			    const char *name2)
@@ -158,6 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
 	sector_t offset;
 	struct extent_position epos = {};
 	struct udf_inode_info *dinfo = UDF_I(dir);
+	int isdotdot = dentry->d_name.len == 2 &&
+		dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.';
 
 	size = udf_ext0_offset(dir) + dir->i_size;
 	f_pos = udf_ext0_offset(dir);
@@ -225,6 +228,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
 				continue;
 		}
 
+		if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) &&
+		    isdotdot) {
+			brelse(epos.bh);
+			return fi;
+		}
+
 		if (!lfi)
 			continue;
 
@@ -286,9 +295,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
 		}
 	}
 	unlock_kernel();
-	d_add(dentry, inode);
 
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 static struct fileIdentDesc *udf_add_entry(struct inode *dir,
@@ -1232,6 +1240,134 @@ end_rename:
 	return retval;
 }
 
+static struct dentry *udf_get_parent(struct dentry *child)
+{
+	struct dentry *parent;
+	struct inode *inode = NULL;
+	struct dentry dotdot;
+	struct fileIdentDesc cfi;
+	struct udf_fileident_bh fibh;
+
+	dotdot.d_name.name = "..";
+	dotdot.d_name.len = 2;
+
+	lock_kernel();
+	if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
+		goto out_unlock;
+
+	if (fibh.sbh != fibh.ebh)
+		brelse(fibh.ebh);
+	brelse(fibh.sbh);
+
+	inode = udf_iget(child->d_inode->i_sb,
+			 lelb_to_cpu(cfi.icb.extLocation));
+	if (!inode)
+		goto out_unlock;
+	unlock_kernel();
+
+	parent = d_alloc_anon(inode);
+	if (!parent) {
+		iput(inode);
+		parent = ERR_PTR(-ENOMEM);
+	}
+
+	return parent;
+out_unlock:
+	unlock_kernel();
+	return ERR_PTR(-EACCES);
+}
+
+
+static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
+					u16 partref, __u32 generation)
+{
+	struct inode *inode;
+	struct dentry *result;
+	kernel_lb_addr loc;
+
+	if (block == 0)
+		return ERR_PTR(-ESTALE);
+
+	loc.logicalBlockNum = block;
+	loc.partitionReferenceNum = partref;
+	inode = udf_iget(sb, loc);
+
+	if (inode == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	if (generation && inode->i_generation != generation) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+	result = d_alloc_anon(inode);
+	if (!result) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	return result;
+}
+
+static struct dentry *udf_fh_to_dentry(struct super_block *sb,
+				       struct fid *fid, int fh_len, int fh_type)
+{
+	if ((fh_len != 3 && fh_len != 5) ||
+	    (fh_type != FILEID_UDF_WITH_PARENT &&
+	     fh_type != FILEID_UDF_WITHOUT_PARENT))
+		return NULL;
+
+	return udf_nfs_get_inode(sb, fid->udf.block, fid->udf.partref,
+			fid->udf.generation);
+}
+
+static struct dentry *udf_fh_to_parent(struct super_block *sb,
+				       struct fid *fid, int fh_len, int fh_type)
+{
+	if (fh_len != 5 || fh_type != FILEID_UDF_WITH_PARENT)
+		return NULL;
+
+	return udf_nfs_get_inode(sb, fid->udf.parent_block,
+				 fid->udf.parent_partref,
+				 fid->udf.parent_generation);
+}
+static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
+			 int connectable)
+{
+	int len = *lenp;
+	struct inode *inode =  de->d_inode;
+	kernel_lb_addr location = UDF_I(inode)->i_location;
+	struct fid *fid = (struct fid *)fh;
+	int type = FILEID_UDF_WITHOUT_PARENT;
+
+	if (len < 3 || (connectable && len < 5))
+		return 255;
+
+	*lenp = 3;
+	fid->udf.block = location.logicalBlockNum;
+	fid->udf.partref = location.partitionReferenceNum;
+	fid->udf.generation = inode->i_generation;
+
+	if (connectable && !S_ISDIR(inode->i_mode)) {
+		spin_lock(&de->d_lock);
+		inode = de->d_parent->d_inode;
+		location = UDF_I(inode)->i_location;
+		fid->udf.parent_block = location.logicalBlockNum;
+		fid->udf.parent_partref = location.partitionReferenceNum;
+		fid->udf.parent_generation = inode->i_generation;
+		spin_unlock(&de->d_lock);
+		*lenp = 5;
+		type = FILEID_UDF_WITH_PARENT;
+	}
+
+	return type;
+}
+
+const struct export_operations udf_export_ops = {
+	.encode_fh	= udf_encode_fh,
+	.fh_to_dentry   = udf_fh_to_dentry,
+	.fh_to_parent   = udf_fh_to_parent,
+	.get_parent     = udf_get_parent,
+};
+
 const struct inode_operations udf_dir_inode_operations = {
 	.lookup				= udf_lookup,
 	.create				= udf_create,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b564fc140fe4..260f4b82c799 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1933,6 +1933,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 
 	/* Fill in the rest of the superblock */
 	sb->s_op = &udf_sb_ops;
+	sb->s_export_op = &udf_export_ops;
 	sb->dq_op = NULL;
 	sb->s_dirt = 0;
 	sb->s_magic = UDF_SUPER_MAGIC;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index f3f45d029277..8fa9c2d70911 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -73,6 +73,7 @@ struct task_struct;
 struct buffer_head;
 struct super_block;
 
+extern const struct export_operations udf_export_ops;
 extern const struct inode_operations udf_dir_inode_operations;
 extern const struct file_operations udf_dir_operations;
 extern const struct inode_operations udf_file_inode_operations;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index de8387b7ceb6..f5abd1306638 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -33,6 +33,19 @@ enum fid_type {
 	 * 32 bit parent directory inode number.
 	 */
 	FILEID_INO32_GEN_PARENT = 2,
+
+	/*
+	 * 32 bit block number, 16 bit partition reference,
+	 * 16 bit unused, 32 bit generation number.
+	 */
+	FILEID_UDF_WITHOUT_PARENT = 0x51,
+
+	/*
+	 * 32 bit block number, 16 bit partition reference,
+	 * 16 bit unused, 32 bit generation number,
+	 * 32 bit parent block number, 32 bit parent generation number
+	 */
+	FILEID_UDF_WITH_PARENT = 0x52,
 };
 
 struct fid {
@@ -43,6 +56,14 @@ struct fid {
 			u32 parent_ino;
 			u32 parent_gen;
 		} i32;
+ 		struct {
+ 			u32 block;
+ 			u16 partref;
+ 			u16 parent_partref;
+ 			u32 generation;
+ 			u32 parent_block;
+ 			u32 parent_generation;
+ 		} udf;
 		__u32 raw[0];
 	};
 };
-- 
cgit v1.2.3


From 6d63c275572d1e6f00d4fa154f16fbb0d8c2d2bf Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 7 May 2008 09:51:23 +0200
Subject: cfq-iosched: make io priorities inherit CPU scheduling class as well
 as nice

We currently set all processes to the best-effort scheduling class,
regardless of what CPU scheduling class they belong to. Improve that
so that we correctly track idle and rt scheduling classes as well.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/cfq-iosched.c    |  4 ++--
 include/linux/ioprio.h | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7f909d2f4886..b399c62936e0 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1303,10 +1303,10 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
 		printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
 	case IOPRIO_CLASS_NONE:
 		/*
-		 * no prio set, place us in the middle of the BE classes
+		 * no prio set, inherit CPU scheduling settings
 		 */
 		cfqq->ioprio = task_nice_ioprio(tsk);
-		cfqq->ioprio_class = IOPRIO_CLASS_BE;
+		cfqq->ioprio_class = task_nice_ioclass(tsk);
 		break;
 	case IOPRIO_CLASS_RT:
 		cfqq->ioprio = task_ioprio(ioc);
diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
index 2a3bb1bb7433..f98a656b17e5 100644
--- a/include/linux/ioprio.h
+++ b/include/linux/ioprio.h
@@ -67,6 +67,20 @@ static inline int task_nice_ioprio(struct task_struct *task)
 	return (task_nice(task) + 20) / 5;
 }
 
+/*
+ * This is for the case where the task hasn't asked for a specific IO class.
+ * Check for idle and rt task process, and return appropriate IO class.
+ */
+static inline int task_nice_ioclass(struct task_struct *task)
+{
+	if (task->policy == SCHED_IDLE)
+		return IOPRIO_CLASS_IDLE;
+	else if (task->policy == SCHED_FIFO || task->policy == SCHED_RR)
+		return IOPRIO_CLASS_RT;
+	else
+		return IOPRIO_CLASS_BE;
+}
+
 /*
  * For inheritance, return the highest of the two given priorities
  */
-- 
cgit v1.2.3


From 28f13702f03e527fcb979747a882cf366c489c50 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 7 May 2008 10:15:46 +0200
Subject: block: avoid duplicate calls to get_part() in disk stat code

get_part() is fairly expensive, as it O(N) loops over partitions
to find the right one. In lots of normal IO paths we end up looking
up the partition twice, to make matters even worse. Change the
stat add code to accept a passed in partition instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c           | 18 ++++++++++--------
 drivers/block/aoe/aoecmd.c | 10 ++++++----
 include/linux/genhd.h      | 35 ++++++++++++++++++-----------------
 3 files changed, 34 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 1b7dddf94f4f..2987fe47b5ee 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -54,15 +54,16 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
 
 static void drive_stat_acct(struct request *rq, int new_io)
 {
+	struct hd_struct *part;
 	int rw = rq_data_dir(rq);
 
 	if (!blk_fs_request(rq) || !rq->rq_disk)
 		return;
 
-	if (!new_io) {
-		__all_stat_inc(rq->rq_disk, merges[rw], rq->sector);
-	} else {
-		struct hd_struct *part = get_part(rq->rq_disk, rq->sector);
+	part = get_part(rq->rq_disk, rq->sector);
+	if (!new_io)
+		__all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
+	else {
 		disk_round_stats(rq->rq_disk);
 		rq->rq_disk->in_flight++;
 		if (part) {
@@ -1538,10 +1539,11 @@ static int __end_that_request_first(struct request *req, int error,
 	}
 
 	if (blk_fs_request(req) && req->rq_disk) {
+		struct hd_struct *part = get_part(req->rq_disk, req->sector);
 		const int rw = rq_data_dir(req);
 
-		all_stat_add(req->rq_disk, sectors[rw],
-			     nr_bytes >> 9, req->sector);
+		all_stat_add(req->rq_disk, part, sectors[rw],
+				nr_bytes >> 9, req->sector);
 	}
 
 	total_bytes = bio_nbytes = 0;
@@ -1727,8 +1729,8 @@ static void end_that_request_last(struct request *req, int error)
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part = get_part(disk, req->sector);
 
-		__all_stat_inc(disk, ios[rw], req->sector);
-		__all_stat_add(disk, ticks[rw], duration, req->sector);
+		__all_stat_inc(disk, part, ios[rw], req->sector);
+		__all_stat_add(disk, part, ticks[rw], duration, req->sector);
 		disk_round_stats(disk);
 		disk->in_flight--;
 		if (part) {
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8fc429cf82b6..41f818be2f7e 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -755,11 +755,13 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector
 {
 	unsigned long n_sect = bio->bi_size >> 9;
 	const int rw = bio_data_dir(bio);
+	struct hd_struct *part;
 
-	all_stat_inc(disk, ios[rw], sector);
-	all_stat_add(disk, ticks[rw], duration, sector);
-	all_stat_add(disk, sectors[rw], n_sect, sector);
-	all_stat_add(disk, io_ticks, duration, sector);
+	part = get_part(disk, sector);
+	all_stat_inc(disk, part, ios[rw], sector);
+	all_stat_add(disk, part, ticks[rw], duration, sector);
+	all_stat_add(disk, part, sectors[rw], n_sect, sector);
+	all_stat_add(disk, part, io_ticks, duration, sector);
 }
 
 void
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ecd2bf63fc84..e9874e7fcdf9 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -178,17 +178,17 @@ static inline struct hd_struct *get_part(struct gendisk *gendiskp,
 
 static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
 	int i;
+
 	for_each_possible_cpu(i)
 		memset(per_cpu_ptr(gendiskp->dkstats, i), value,
-				sizeof (struct disk_stats));
+				sizeof(struct disk_stats));
 }		
 
 #define __part_stat_add(part, field, addnd)				\
 	(per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
 
-#define __all_stat_add(gendiskp, field, addnd, sector)		\
+#define __all_stat_add(gendiskp, part, field, addnd, sector)	\
 ({								\
-	struct hd_struct *part = get_part(gendiskp, sector);	\
 	if (part)						\
 		__part_stat_add(part, field, addnd);		\
 	__disk_stat_add(gendiskp, field, addnd);		\
@@ -203,11 +203,13 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
 	res;								\
 })
 
-static inline void part_stat_set_all(struct hd_struct *part, int value)	{
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
 	int i;
+
 	for_each_possible_cpu(i)
 		memset(per_cpu_ptr(part->dkstats, i), value,
-		       sizeof(struct disk_stats));
+				sizeof(struct disk_stats));
 }
 				
 #else /* !CONFIG_SMP */
@@ -223,9 +225,8 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
 #define __part_stat_add(part, field, addnd) \
 	(part->dkstats.field += addnd)
 
-#define __all_stat_add(gendiskp, field, addnd, sector)		\
+#define __all_stat_add(gendiskp, part, field, addnd, sector)	\
 ({								\
-	struct hd_struct *part = get_part(gendiskp, sector);	\
 	if (part)						\
 		part->dkstats.field += addnd;			\
 	__disk_stat_add(gendiskp, field, addnd);		\
@@ -276,10 +277,10 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
 #define part_stat_sub(gendiskp, field, subnd) \
 		part_stat_add(gendiskp, field, -subnd)
 
-#define all_stat_add(gendiskp, field, addnd, sector)		\
+#define all_stat_add(gendiskp, part, field, addnd, sector)	\
 	do {							\
 		preempt_disable();				\
-		__all_stat_add(gendiskp, field, addnd, sector);	\
+		__all_stat_add(gendiskp, part, field, addnd, sector);	\
 		preempt_enable();				\
 	} while (0)
 
@@ -288,15 +289,15 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
 #define all_stat_dec(gendiskp, field, sector) \
 		all_stat_add(gendiskp, field, -1, sector)
 
-#define __all_stat_inc(gendiskp, field, sector) \
-		__all_stat_add(gendiskp, field, 1, sector)
-#define all_stat_inc(gendiskp, field, sector) \
-		all_stat_add(gendiskp, field, 1, sector)
+#define __all_stat_inc(gendiskp, part, field, sector) \
+		__all_stat_add(gendiskp, part, field, 1, sector)
+#define all_stat_inc(gendiskp, part, field, sector) \
+		all_stat_add(gendiskp, part, field, 1, sector)
 
-#define __all_stat_sub(gendiskp, field, subnd, sector) \
-		__all_stat_add(gendiskp, field, -subnd, sector)
-#define all_stat_sub(gendiskp, field, subnd, sector) \
-		all_stat_add(gendiskp, field, -subnd, sector)
+#define __all_stat_sub(gendiskp, part, field, subnd, sector) \
+		__all_stat_add(gendiskp, part, field, -subnd, sector)
+#define all_stat_sub(gendiskp, part, field, subnd, sector) \
+		all_stat_add(gendiskp, part, field, -subnd, sector)
 
 /* Inlines to alloc and free disk stats in struct gendisk */
 #ifdef  CONFIG_SMP
-- 
cgit v1.2.3


From ef75d49f116bccbb80bccd423ecf3cb86c4509a5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 8 May 2008 01:15:21 -0700
Subject: netfilter: nf_conntrack_sip: restrict RTP expect flushing on error to
 last request

Some Inovaphone PBXs exhibit very stange behaviour: when dialing for
example "123", the device sends INVITE requests for "1", "12" and
"123" back to back.  The first requests will elicit error responses
from the receiver, causing the SIP helper to flush the RTP
expectations even though we might still see a positive response.

Note the sequence number of the last INVITE request that contained a
media description and only flush the expectations when receiving a
negative response for that sequence number.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nf_conntrack_sip.h |  1 +
 net/netfilter/nf_conntrack_sip.c           | 22 +++++++++++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index 5da04e586a3f..23aa2ec6b7b7 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -7,6 +7,7 @@
 
 struct nf_ct_sip_master {
 	unsigned int	register_cseq;
+	unsigned int	invite_cseq;
 };
 
 enum sip_expectation_classes {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 9f4900069561..2f9bbc058b48 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -870,6 +870,7 @@ static int process_sdp(struct sk_buff *skb,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_help *help = nfct_help(ct);
 	unsigned int matchoff, matchlen;
 	unsigned int mediaoff, medialen;
 	unsigned int sdpoff;
@@ -959,6 +960,9 @@ static int process_sdp(struct sk_buff *skb,
 	if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
 		ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr);
 
+	if (ret == NF_ACCEPT && i > 0)
+		help->help.ct_sip_info.invite_cseq = cseq;
+
 	return ret;
 }
 static int process_invite_response(struct sk_buff *skb,
@@ -967,14 +971,14 @@ static int process_invite_response(struct sk_buff *skb,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_help *help = nfct_help(ct);
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
 		return process_sdp(skb, dptr, datalen, cseq);
-	else {
+	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
-		return NF_ACCEPT;
-	}
+	return NF_ACCEPT;
 }
 
 static int process_update_response(struct sk_buff *skb,
@@ -983,14 +987,14 @@ static int process_update_response(struct sk_buff *skb,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_help *help = nfct_help(ct);
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
 		return process_sdp(skb, dptr, datalen, cseq);
-	else {
+	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
-		return NF_ACCEPT;
-	}
+	return NF_ACCEPT;
 }
 
 static int process_prack_response(struct sk_buff *skb,
@@ -999,14 +1003,14 @@ static int process_prack_response(struct sk_buff *skb,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_help *help = nfct_help(ct);
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
 		return process_sdp(skb, dptr, datalen, cseq);
-	else {
+	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
-		return NF_ACCEPT;
-	}
+	return NF_ACCEPT;
 }
 
 static int process_bye_request(struct sk_buff *skb,
-- 
cgit v1.2.3


From 8af302e2dc91d4229968b8eedd4b45c0dd9fc717 Mon Sep 17 00:00:00 2001
From: Jochen Friedrich <jochen@scram.de>
Date: Wed, 7 May 2008 04:40:01 +1000
Subject: [POWERPC] Fix of_i2c include for module compilation

Remove #ifdef CONFIG_OF_I2C as this breaks module compilation.
Drivers using this header should depend on OF_I2C anyways, so
there's no need to make this conditional.

Signed-off-by: Jochen Friedrich <jochen@scram.de>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/linux/of_i2c.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_i2c.h b/include/linux/of_i2c.h
index 2e5a96732042..bd2a870ec296 100644
--- a/include/linux/of_i2c.h
+++ b/include/linux/of_i2c.h
@@ -14,11 +14,7 @@
 
 #include <linux/i2c.h>
 
-#ifdef CONFIG_OF_I2C
-
 void of_register_i2c_devices(struct i2c_adapter *adap,
 			     struct device_node *adap_node);
 
-#endif /* CONFIG_OF_I2C */
-
 #endif /* __LINUX_OF_I2C_H */
-- 
cgit v1.2.3


From 6c2545eefffc452e52302c96c955d9aa26353aa9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 9 May 2008 16:23:17 +1000
Subject: module: put modversions in vermagic

Don't allow a module built without versions altogether to be inserted
into a kernel which expects modversions.

modprobe --force will strip vermagic as well as modversions, so it
won't be effected, but this will make sure that a
non-CONFIG_MODVERSIONS module won't be accidentally inserted into a
CONFIG_MODVERSIONS kernel.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vermagic.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 4d0909e53595..79b9837d9ca0 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -17,6 +17,11 @@
 #else
 #define MODULE_VERMAGIC_MODULE_UNLOAD ""
 #endif
+#ifdef CONFIG_MODVERSIONS
+#define MODULE_VERMAGIC_MODVERSIONS "modversions "
+#else
+#define MODULE_VERMAGIC_MODVERSIONS ""
+#endif
 #ifndef MODULE_ARCH_VERMAGIC
 #define MODULE_ARCH_VERMAGIC ""
 #endif
@@ -24,5 +29,6 @@
 #define VERMAGIC_STRING 						\
 	UTS_RELEASE " "							\
 	MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT 			\
-	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_ARCH_VERMAGIC
+	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS	\
+	MODULE_ARCH_VERMAGIC
 
-- 
cgit v1.2.3


From 005b1f7495e812b99b73de5adbc73afd7a1cbcaf Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Fri, 9 May 2008 15:00:55 -0400
Subject: [libata] revert new check-ready Status register logic

This behavior differs across multiple controllers, so we cannot use
common logic for all controllers.

Revert back to the basic common behavior, and specific drivers will
be updated from here to take into account the unusual Status return
values.

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 7e206da1fbfb..0f17643e0a6e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1384,17 +1384,14 @@ static inline struct ata_port *ata_shost_to_port(struct Scsi_Host *host)
 
 static inline int ata_check_ready(u8 status)
 {
-	/* Some controllers report 0x77 or 0x7f during intermediate
-	 * not-ready stages.
-	 */
-	if (status == 0x77 || status == 0x7f)
-		return 0;
+	if (!(status & ATA_BUSY))
+		return 1;
 
 	/* 0xff indicates either no device or device not ready */
 	if (status == 0xff)
 		return -ENODEV;
 
-	return !(status & ATA_BUSY);
+	return 0;
 }
 
 
-- 
cgit v1.2.3


From 9c3cdc1f83a6e07092392ff4aba6466517dbd1d0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 10 May 2008 19:51:16 -0700
Subject: Move ACCESS_ONCE() to <linux/compiler.h>

It actually makes much more sense there, and we do tend to need it for
non-RCU usage too.  Moving it to <linux/compiler.h> will allow some
other cases that have open-coded the same logic to use the same helper
function that RCU has used.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 12 ++++++++++++
 include/linux/rcupdate.h | 12 ------------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index dcae0c8d97e6..c8bd2daf95ec 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -182,4 +182,16 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __section(S) __attribute__ ((__section__(#S)))
 #endif
 
+/*
+ * Prevent the compiler from merging or refetching accesses.  The compiler
+ * is also forbidden from reordering successive instances of ACCESS_ONCE(),
+ * but only when the compiler is aware of some particular ordering.  One way
+ * to make the compiler aware of ordering is to put the two invocations of
+ * ACCESS_ONCE() in different C statements.
+ *
+ * This macro does absolutely -nothing- to prevent the CPU from reordering,
+ * merging, or refetching absolutely anything at any time.
+ */
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8082d6587a0f..d42dbec06083 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -131,18 +131,6 @@ struct rcu_head {
  */
 #define rcu_read_unlock_bh() __rcu_read_unlock_bh()
 
-/*
- * Prevent the compiler from merging or refetching accesses.  The compiler
- * is also forbidden from reordering successive instances of ACCESS_ONCE(),
- * but only when the compiler is aware of some particular ordering.  One way
- * to make the compiler aware of ordering is to put the two invocations of
- * ACCESS_ONCE() in different C statements.
- *
- * This macro does absolutely -nothing- to prevent the CPU from reordering,
- * merging, or refetching absolutely anything at any time.
- */
-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
-
 /**
  * rcu_dereference - fetch an RCU-protected pointer in an
  * RCU read-side critical section.  This pointer may later
-- 
cgit v1.2.3


From 8e3e076c5a78519a9f64cd384e8f18bc21882ce0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 10 May 2008 20:58:02 -0700
Subject: BKL: revert back to the old spinlock implementation

The generic semaphore rewrite had a huge performance regression on AIM7
(and potentially other BKL-heavy benchmarks) because the generic
semaphores had been rewritten to be simple to understand and fair.  The
latter, in particular, turns a semaphore-based BKL implementation into a
mess of scheduling.

The attempt to fix the performance regression failed miserably (see the
previous commit 00b41ec2611dc98f87f30753ee00a53db648d662 'Revert
"semaphore: fix"'), and so for now the simple and sane approach is to
instead just go back to the old spinlock-based BKL implementation that
never had any issues like this.

This patch also has the advantage of being reported to fix the
regression completely according to Yanmin Zhang, unlike the semaphore
hack which still left a couple percentage point regression.

As a spinlock, the BKL obviously has the potential to be a latency
issue, but it's not really any different from any other spinlock in that
respect.  We do want to get rid of the BKL asap, but that has been the
plan for several years.

These days, the biggest users are in the tty layer (open/release in
particular) and Alan holds out some hope:

  "tty release is probably a few months away from getting cured - I'm
   afraid it will almost certainly be the very last user of the BKL in
   tty to get fixed as it depends on everything else being sanely locked."

so while we're not there yet, we do have a plan of action.

Tested-by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Alexander Viro <viro@ftp.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mn10300/Kconfig    |  11 -----
 include/linux/hardirq.h |  18 ++++----
 kernel/sched.c          |  27 ++---------
 lib/kernel_lock.c       | 120 ++++++++++++++++++++++++++++++++----------------
 4 files changed, 95 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 6a6409adc564..e856218da90d 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -186,17 +186,6 @@ config PREEMPT
 	  Say Y here if you are building a kernel for a desktop, embedded
 	  or real-time system.  Say N if you are unsure.
 
-config PREEMPT_BKL
-	bool "Preempt The Big Kernel Lock"
-	depends on PREEMPT
-	default y
-	help
-	  This option reduces the latency of the kernel by making the
-	  big kernel lock preemptible.
-
-	  Say Y here if you are building a kernel for a desktop system.
-	  Say N if you are unsure.
-
 config MN10300_CURRENT_IN_E2
 	bool "Hold current task address in E2 register"
 	default y
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 897f723bd222..181006cc94a0 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -72,6 +72,14 @@
 #define in_softirq()		(softirq_count())
 #define in_interrupt()		(irq_count())
 
+#if defined(CONFIG_PREEMPT)
+# define PREEMPT_INATOMIC_BASE kernel_locked()
+# define PREEMPT_CHECK_OFFSET 1
+#else
+# define PREEMPT_INATOMIC_BASE 0
+# define PREEMPT_CHECK_OFFSET 0
+#endif
+
 /*
  * Are we running in atomic context?  WARNING: this macro cannot
  * always detect atomic context; in particular, it cannot know about
@@ -79,17 +87,11 @@
  * used in the general case to determine whether sleeping is possible.
  * Do not use in_atomic() in driver code.
  */
-#define in_atomic()		((preempt_count() & ~PREEMPT_ACTIVE) != 0)
-
-#ifdef CONFIG_PREEMPT
-# define PREEMPT_CHECK_OFFSET 1
-#else
-# define PREEMPT_CHECK_OFFSET 0
-#endif
+#define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE)
 
 /*
  * Check whether we were atomic before we did preempt_disable():
- * (used by the scheduler)
+ * (used by the scheduler, *after* releasing the kernel lock)
  */
 #define in_atomic_preempt_off() \
 		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
diff --git a/kernel/sched.c b/kernel/sched.c
index 58fb8af15776..c51b6565e07c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4567,8 +4567,6 @@ EXPORT_SYMBOL(schedule);
 asmlinkage void __sched preempt_schedule(void)
 {
 	struct thread_info *ti = current_thread_info();
-	struct task_struct *task = current;
-	int saved_lock_depth;
 
 	/*
 	 * If there is a non-zero preempt_count or interrupts are disabled,
@@ -4579,16 +4577,7 @@ asmlinkage void __sched preempt_schedule(void)
 
 	do {
 		add_preempt_count(PREEMPT_ACTIVE);
-
-		/*
-		 * We keep the big kernel semaphore locked, but we
-		 * clear ->lock_depth so that schedule() doesnt
-		 * auto-release the semaphore:
-		 */
-		saved_lock_depth = task->lock_depth;
-		task->lock_depth = -1;
 		schedule();
-		task->lock_depth = saved_lock_depth;
 		sub_preempt_count(PREEMPT_ACTIVE);
 
 		/*
@@ -4609,26 +4598,15 @@ EXPORT_SYMBOL(preempt_schedule);
 asmlinkage void __sched preempt_schedule_irq(void)
 {
 	struct thread_info *ti = current_thread_info();
-	struct task_struct *task = current;
-	int saved_lock_depth;
 
 	/* Catch callers which need to be fixed */
 	BUG_ON(ti->preempt_count || !irqs_disabled());
 
 	do {
 		add_preempt_count(PREEMPT_ACTIVE);
-
-		/*
-		 * We keep the big kernel semaphore locked, but we
-		 * clear ->lock_depth so that schedule() doesnt
-		 * auto-release the semaphore:
-		 */
-		saved_lock_depth = task->lock_depth;
-		task->lock_depth = -1;
 		local_irq_enable();
 		schedule();
 		local_irq_disable();
-		task->lock_depth = saved_lock_depth;
 		sub_preempt_count(PREEMPT_ACTIVE);
 
 		/*
@@ -5853,8 +5831,11 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
 	spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
+#if defined(CONFIG_PREEMPT)
+	task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
+#else
 	task_thread_info(idle)->preempt_count = 0;
-
+#endif
 	/*
 	 * The idle tasks have their own, simple scheduling class:
 	 */
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index cd3e82530b03..01a3c22c1b5a 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -11,79 +11,121 @@
 #include <linux/semaphore.h>
 
 /*
- * The 'big kernel semaphore'
+ * The 'big kernel lock'
  *
- * This mutex is taken and released recursively by lock_kernel()
+ * This spinlock is taken and released recursively by lock_kernel()
  * and unlock_kernel().  It is transparently dropped and reacquired
  * over schedule().  It is used to protect legacy code that hasn't
  * been migrated to a proper locking design yet.
  *
- * Note: code locked by this semaphore will only be serialized against
- * other code using the same locking facility. The code guarantees that
- * the task remains on the same CPU.
- *
  * Don't use in new code.
  */
-static DECLARE_MUTEX(kernel_sem);
+static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
+
 
 /*
- * Re-acquire the kernel semaphore.
+ * Acquire/release the underlying lock from the scheduler.
  *
- * This function is called with preemption off.
+ * This is called with preemption disabled, and should
+ * return an error value if it cannot get the lock and
+ * TIF_NEED_RESCHED gets set.
  *
- * We are executing in schedule() so the code must be extremely careful
- * about recursion, both due to the down() and due to the enabling of
- * preemption. schedule() will re-check the preemption flag after
- * reacquiring the semaphore.
+ * If it successfully gets the lock, it should increment
+ * the preemption count like any spinlock does.
+ *
+ * (This works on UP too - _raw_spin_trylock will never
+ * return false in that case)
  */
 int __lockfunc __reacquire_kernel_lock(void)
 {
-	struct task_struct *task = current;
-	int saved_lock_depth = task->lock_depth;
-
-	BUG_ON(saved_lock_depth < 0);
-
-	task->lock_depth = -1;
-	preempt_enable_no_resched();
-
-	down(&kernel_sem);
-
+	while (!_raw_spin_trylock(&kernel_flag)) {
+		if (test_thread_flag(TIF_NEED_RESCHED))
+			return -EAGAIN;
+		cpu_relax();
+	}
 	preempt_disable();
-	task->lock_depth = saved_lock_depth;
-
 	return 0;
 }
 
 void __lockfunc __release_kernel_lock(void)
 {
-	up(&kernel_sem);
+	_raw_spin_unlock(&kernel_flag);
+	preempt_enable_no_resched();
 }
 
 /*
- * Getting the big kernel semaphore.
+ * These are the BKL spinlocks - we try to be polite about preemption.
+ * If SMP is not on (ie UP preemption), this all goes away because the
+ * _raw_spin_trylock() will always succeed.
  */
-void __lockfunc lock_kernel(void)
+#ifdef CONFIG_PREEMPT
+static inline void __lock_kernel(void)
 {
-	struct task_struct *task = current;
-	int depth = task->lock_depth + 1;
+	preempt_disable();
+	if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
+		/*
+		 * If preemption was disabled even before this
+		 * was called, there's nothing we can be polite
+		 * about - just spin.
+		 */
+		if (preempt_count() > 1) {
+			_raw_spin_lock(&kernel_flag);
+			return;
+		}
 
-	if (likely(!depth))
 		/*
-		 * No recursion worries - we set up lock_depth _after_
+		 * Otherwise, let's wait for the kernel lock
+		 * with preemption enabled..
 		 */
-		down(&kernel_sem);
+		do {
+			preempt_enable();
+			while (spin_is_locked(&kernel_flag))
+				cpu_relax();
+			preempt_disable();
+		} while (!_raw_spin_trylock(&kernel_flag));
+	}
+}
 
-	task->lock_depth = depth;
+#else
+
+/*
+ * Non-preemption case - just get the spinlock
+ */
+static inline void __lock_kernel(void)
+{
+	_raw_spin_lock(&kernel_flag);
 }
+#endif
 
-void __lockfunc unlock_kernel(void)
+static inline void __unlock_kernel(void)
 {
-	struct task_struct *task = current;
+	/*
+	 * the BKL is not covered by lockdep, so we open-code the
+	 * unlocking sequence (and thus avoid the dep-chain ops):
+	 */
+	_raw_spin_unlock(&kernel_flag);
+	preempt_enable();
+}
 
-	BUG_ON(task->lock_depth < 0);
+/*
+ * Getting the big kernel lock.
+ *
+ * This cannot happen asynchronously, so we only need to
+ * worry about other CPU's.
+ */
+void __lockfunc lock_kernel(void)
+{
+	int depth = current->lock_depth+1;
+	if (likely(!depth))
+		__lock_kernel();
+	current->lock_depth = depth;
+}
 
-	if (likely(--task->lock_depth < 0))
-		up(&kernel_sem);
+void __lockfunc unlock_kernel(void)
+{
+	BUG_ON(current->lock_depth < 0);
+	if (likely(--current->lock_depth < 0))
+		__unlock_kernel();
 }
 
 EXPORT_SYMBOL(lock_kernel);
-- 
cgit v1.2.3


From 60b129d7bfa3e20450816983bd52c49bb0bc1c21 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 11 May 2008 20:37:06 +0200
Subject: i2c: Match dummy devices by type

As the old driver_name/type matching scheme is going away soon, change
the dummy device mechanism to use the new matching scheme.

This has the downside that dummy i2c clients can no longer choose
their name, they'll all appear as "dummy" in sysfs and in log
messages. I don't think it is a problem in practice though, as there
is little reason to use these i2c clients to log messages.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/i2c/i2c-core.c    | 14 ++++++++------
 drivers/rtc/rtc-s35390a.c |  2 +-
 include/linux/i2c.h       |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 26384daccb96..c99ebeadb558 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -327,6 +327,11 @@ void i2c_unregister_device(struct i2c_client *client)
 EXPORT_SYMBOL_GPL(i2c_unregister_device);
 
 
+static const struct i2c_device_id dummy_id[] = {
+	{ "dummy", 0 },
+	{ },
+};
+
 static int dummy_probe(struct i2c_client *client,
 		       const struct i2c_device_id *id)
 {
@@ -342,13 +347,13 @@ static struct i2c_driver dummy_driver = {
 	.driver.name	= "dummy",
 	.probe		= dummy_probe,
 	.remove		= dummy_remove,
+	.id_table	= dummy_id,
 };
 
 /**
  * i2c_new_dummy - return a new i2c device bound to a dummy driver
  * @adapter: the adapter managing the device
  * @address: seven bit address to be used
- * @type: optional label used for i2c_client.name
  * Context: can sleep
  *
  * This returns an I2C client bound to the "dummy" driver, intended for use
@@ -364,15 +369,12 @@ static struct i2c_driver dummy_driver = {
  * i2c_unregister_device(); or NULL to indicate an error.
  */
 struct i2c_client *
-i2c_new_dummy(struct i2c_adapter *adapter, u16 address, const char *type)
+i2c_new_dummy(struct i2c_adapter *adapter, u16 address)
 {
 	struct i2c_board_info info = {
-		.driver_name	= "dummy",
-		.addr		= address,
+		I2C_BOARD_INFO("dummy", address),
 	};
 
-	if (type)
-		strlcpy(info.type, type, sizeof info.type);
 	return i2c_new_device(adapter, &info);
 }
 EXPORT_SYMBOL_GPL(i2c_new_dummy);
diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index 29f47bacfc77..a6fa1f2f2ca6 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -227,7 +227,7 @@ static int s35390a_probe(struct i2c_client *client,
 	/* This chip uses multiple addresses, use dummy devices for them */
 	for (i = 1; i < 8; ++i) {
 		s35390a->client[i] = i2c_new_dummy(client->adapter,
-					client->addr + i, "rtc-s35390a");
+					client->addr + i);
 		if (!s35390a->client[i]) {
 			dev_err(&client->dev, "Address %02x unavailable\n",
 						client->addr + i);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index cb63da5c2139..6716ec808c5e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -262,7 +262,7 @@ i2c_new_probed_device(struct i2c_adapter *adap,
  * client handles for the extra addresses.
  */
 extern struct i2c_client *
-i2c_new_dummy(struct i2c_adapter *adap, u16 address, const char *type);
+i2c_new_dummy(struct i2c_adapter *adap, u16 address);
 
 extern void i2c_unregister_device(struct i2c_client *);
 
-- 
cgit v1.2.3


From c3921ab71507b108d51a0f1ee960f80cd668a93d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 11 May 2008 16:04:48 -0700
Subject: Add new 'cond_resched_bkl()' helper function

It acts exactly like a regular 'cond_resched()', but will not get
optimized away when CONFIG_PREEMPT is set.

Normal kernel code is already preemptable in the presense of
CONFIG_PREEMPT, so cond_resched() is optimized away (see commit
02b67cc3ba36bdba351d6c3a00593f4ec550d9d3 "sched: do not do
cond_resched() when CONFIG_PREEMPT").

But when wanting to conditionally reschedule while holding a lock, you
need to use "cond_sched_lock(lock)", and the new function is the BKL
equivalent of that.

Also make fs/locks.c use it.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/locks.c            | 2 +-
 include/linux/sched.h | 6 +++++-
 kernel/sched.c        | 2 --
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index 0ac6b92cb0b6..11dbf08651b7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -773,7 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	 * give it the opportunity to lock the file.
 	 */
 	if (found)
-		cond_resched();
+		cond_resched_bkl();
 
 find_conflict:
 	for_each_lock(inode, before) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0c35b0343a76..4ab9f32f9238 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2037,13 +2037,13 @@ static inline int need_resched(void)
  * cond_resched_lock() will drop the spinlock before scheduling,
  * cond_resched_softirq() will enable bhs before scheduling.
  */
+extern int _cond_resched(void);
 #ifdef CONFIG_PREEMPT
 static inline int cond_resched(void)
 {
 	return 0;
 }
 #else
-extern int _cond_resched(void);
 static inline int cond_resched(void)
 {
 	return _cond_resched();
@@ -2051,6 +2051,10 @@ static inline int cond_resched(void)
 #endif
 extern int cond_resched_lock(spinlock_t * lock);
 extern int cond_resched_softirq(void);
+static inline int cond_resched_bkl(void)
+{
+	return _cond_resched();
+}
 
 /*
  * Does a critical section need to be broken due to another
diff --git a/kernel/sched.c b/kernel/sched.c
index c51b6565e07c..8841a915545d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5525,7 +5525,6 @@ static void __cond_resched(void)
 	} while (need_resched());
 }
 
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
 int __sched _cond_resched(void)
 {
 	if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
@@ -5536,7 +5535,6 @@ int __sched _cond_resched(void)
 	return 0;
 }
 EXPORT_SYMBOL(_cond_resched);
-#endif
 
 /*
  * cond_resched_lock() - if a reschedule is pending, drop the given lock,
-- 
cgit v1.2.3


From 4951704b4e23d71b99ac933d8e6993bc6225ac13 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 May 2008 03:29:11 -0700
Subject: syncppp: Fix crashes.

The syncppp layer wants a mid-level netdev private pointer.

It was using netdev->priv but that only worked by accident,
and thus this scheme was broken when the device private
allocation strategy changed.

Add a proper mid-layer private pointer for uses like this,
update syncppp and all users, and remove the HDLC_PPP broken
tag from drivers/net/wan/Kconfig

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/Kconfig        |  4 +---
 drivers/net/wan/cosa.c         | 14 +++++++-------
 drivers/net/wan/hdlc_ppp.c     |  2 +-
 drivers/net/wan/hostess_sv11.c | 12 ++++++------
 drivers/net/wan/lmc/lmc_main.c |  1 +
 drivers/net/wan/sealevel.c     |  1 +
 include/linux/netdevice.h      |  3 +++
 include/net/syncppp.h          |  2 +-
 8 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index 8005dd16fb4e..d5140aed7b79 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -150,11 +150,9 @@ config HDLC_FR
 
 config HDLC_PPP
 	tristate "Synchronous Point-to-Point Protocol (PPP) support"
-	depends on HDLC && BROKEN
+	depends on HDLC
 	help
 	  Generic HDLC driver supporting PPP over WAN connections.
-	  This module is currently broken and will cause a kernel panic
-	  when a device configured in PPP mode is activated.
 
 	  It will be replaced by new PPP implementation in Linux 2.6.26.
 
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 45ddfc9763cc..b0fce1387eaf 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -629,7 +629,7 @@ static void sppp_channel_init(struct channel_data *chan)
 	d->base_addr = chan->cosa->datareg;
 	d->irq = chan->cosa->irq;
 	d->dma = chan->cosa->dma;
-	d->priv = chan;
+	d->ml_priv = chan;
 	sppp_attach(&chan->pppdev);
 	if (register_netdev(d)) {
 		printk(KERN_WARNING "%s: register_netdev failed.\n", d->name);
@@ -650,7 +650,7 @@ static void sppp_channel_delete(struct channel_data *chan)
 
 static int cosa_sppp_open(struct net_device *d)
 {
-	struct channel_data *chan = d->priv;
+	struct channel_data *chan = d->ml_priv;
 	int err;
 	unsigned long flags;
 
@@ -690,7 +690,7 @@ static int cosa_sppp_open(struct net_device *d)
 
 static int cosa_sppp_tx(struct sk_buff *skb, struct net_device *dev)
 {
-	struct channel_data *chan = dev->priv;
+	struct channel_data *chan = dev->ml_priv;
 
 	netif_stop_queue(dev);
 
@@ -701,7 +701,7 @@ static int cosa_sppp_tx(struct sk_buff *skb, struct net_device *dev)
 
 static void cosa_sppp_timeout(struct net_device *dev)
 {
-	struct channel_data *chan = dev->priv;
+	struct channel_data *chan = dev->ml_priv;
 
 	if (test_bit(RXBIT, &chan->cosa->rxtx)) {
 		chan->stats.rx_errors++;
@@ -720,7 +720,7 @@ static void cosa_sppp_timeout(struct net_device *dev)
 
 static int cosa_sppp_close(struct net_device *d)
 {
-	struct channel_data *chan = d->priv;
+	struct channel_data *chan = d->ml_priv;
 	unsigned long flags;
 
 	netif_stop_queue(d);
@@ -800,7 +800,7 @@ static int sppp_tx_done(struct channel_data *chan, int size)
 
 static struct net_device_stats *cosa_net_stats(struct net_device *dev)
 {
-	struct channel_data *chan = dev->priv;
+	struct channel_data *chan = dev->ml_priv;
 	return &chan->stats;
 }
 
@@ -1217,7 +1217,7 @@ static int cosa_sppp_ioctl(struct net_device *dev, struct ifreq *ifr,
 	int cmd)
 {
 	int rv;
-	struct channel_data *chan = dev->priv;
+	struct channel_data *chan = dev->ml_priv;
 	rv = cosa_ioctl_common(chan->cosa, chan, cmd, (unsigned long)ifr->ifr_data);
 	if (rv == -ENOIOCTLCMD) {
 		return sppp_do_ioctl(dev, ifr, cmd);
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 10396d9686f4..00308337928e 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -45,7 +45,7 @@ static int ppp_open(struct net_device *dev)
 	int (*old_ioctl)(struct net_device *, struct ifreq *, int);
 	int result;
 
-	dev->priv = &state(hdlc)->syncppp_ptr;
+	dev->ml_priv = &state(hdlc)->syncppp_ptr;
 	state(hdlc)->syncppp_ptr = &state(hdlc)->pppdev;
 	state(hdlc)->pppdev.dev = dev;
 
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index 83dbc924fcb5..f3065d3473fd 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -75,7 +75,7 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb)
  
 static int hostess_open(struct net_device *d)
 {
-	struct sv11_device *sv11=d->priv;
+	struct sv11_device *sv11=d->ml_priv;
 	int err = -1;
 	
 	/*
@@ -128,7 +128,7 @@ static int hostess_open(struct net_device *d)
 
 static int hostess_close(struct net_device *d)
 {
-	struct sv11_device *sv11=d->priv;
+	struct sv11_device *sv11=d->ml_priv;
 	/*
 	 *	Discard new frames
 	 */
@@ -159,14 +159,14 @@ static int hostess_close(struct net_device *d)
 
 static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
 {
-	/* struct sv11_device *sv11=d->priv;
+	/* struct sv11_device *sv11=d->ml_priv;
 	   z8530_ioctl(d,&sv11->sync.chanA,ifr,cmd) */
 	return sppp_do_ioctl(d, ifr,cmd);
 }
 
 static struct net_device_stats *hostess_get_stats(struct net_device *d)
 {
-	struct sv11_device *sv11=d->priv;
+	struct sv11_device *sv11=d->ml_priv;
 	if(sv11)
 		return z8530_get_stats(&sv11->sync.chanA);
 	else
@@ -179,7 +179,7 @@ static struct net_device_stats *hostess_get_stats(struct net_device *d)
  
 static int hostess_queue_xmit(struct sk_buff *skb, struct net_device *d)
 {
-	struct sv11_device *sv11=d->priv;
+	struct sv11_device *sv11=d->ml_priv;
 	return z8530_queue_xmit(&sv11->sync.chanA, skb);
 }
 
@@ -325,6 +325,7 @@ static struct sv11_device *sv11_init(int iobase, int irq)
 		/* 
 		 *	Initialise the PPP components
 		 */
+		d->ml_priv = sv;
 		sppp_attach(&sv->netdev);
 		
 		/*
@@ -333,7 +334,6 @@ static struct sv11_device *sv11_init(int iobase, int irq)
 		
 		d->base_addr = iobase;
 		d->irq = irq;
-		d->priv = sv;
 		
 		if(register_netdev(d))
 		{
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 6635ecef36e5..62133cee446a 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -891,6 +891,7 @@ static int __devinit lmc_init_one(struct pci_dev *pdev,
 
     /* Initialize the sppp layer */
     /* An ioctl can cause a subsequent detach for raw frame interface */
+    dev->ml_priv = sc;
     sc->if_type = LMC_PPP;
     sc->check = 0xBEAFCAFE;
     dev->base_addr = pci_resource_start(pdev, 0);
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 11276bf3149f..44a89df1b8bf 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -241,6 +241,7 @@ static inline struct slvl_device *slvl_alloc(int iobase, int irq)
 		return NULL;
 
 	sv = d->priv;
+	d->ml_priv = sv;
 	sv->if_ptr = &sv->pppdev;
 	sv->pppdev.dev = d;
 	d->base_addr = iobase;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7c1d4466583b..746901774d49 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -715,6 +715,9 @@ struct net_device
 	struct net		*nd_net;
 #endif
 
+	/* mid-layer private */
+	void			*ml_priv;
+
 	/* bridge stuff */
 	struct net_bridge_port	*br_port;
 	/* macvlan */
diff --git a/include/net/syncppp.h b/include/net/syncppp.h
index 877efa434700..e43f4070d892 100644
--- a/include/net/syncppp.h
+++ b/include/net/syncppp.h
@@ -59,7 +59,7 @@ struct ppp_device
 
 static inline struct sppp *sppp_of(struct net_device *dev) 
 {
-	struct ppp_device **ppp = dev->priv;
+	struct ppp_device **ppp = dev->ml_priv;
 	BUG_ON((*ppp)->dev != dev);
 	return &(*ppp)->sppp;
 }
-- 
cgit v1.2.3


From 9404ef02974a5411687b6c1b8ef3984305620e02 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 12 May 2008 10:14:22 -0700
Subject: Fix up 'need_resched()' definition

We should not go through the task pointer to get at the thread info,
since it's usually cheaper to just access the thread info directly.

So don't make the code look up 'current', when we can just use the
thread info accessor functions directly.  This generally avoids one
level of indirection and tends to work better together with code that
also looks at other thread flags (eg preempt_count).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ab9f32f9238..5a63f2d72af6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2027,7 +2027,7 @@ static inline int fatal_signal_pending(struct task_struct *p)
 
 static inline int need_resched(void)
 {
-	return unlikely(test_tsk_need_resched(current));
+	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
 }
 
 /*
-- 
cgit v1.2.3


From c714a534d85576af21b06be605ca55cb2fb887ee Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 12 May 2008 13:34:13 -0700
Subject: Make 'cond_resched()' nullification depend on PREEMPT_BKL

Because it's not correct with a non-preemptable BKL and just causes
PREEMPT kernels to have longer latencies than non-PREEMPT ones (which is
obviously not the point of it at all).

Of course, that config option actually got removed as an option earlier,
so for now this basically disables it entirely, but if BKL preemption is
ever resurrected it will be a meaningful optimization.  And in the
meantime, it at least documents the intent of the code, while not doing
the wrong thing.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a63f2d72af6..5395a6176f4b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2038,7 +2038,7 @@ static inline int need_resched(void)
  * cond_resched_softirq() will enable bhs before scheduling.
  */
 extern int _cond_resched(void);
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPT_BKL
 static inline int cond_resched(void)
 {
 	return 0;
-- 
cgit v1.2.3


From 8388e3da34edb141362bb42811ee487dfec15525 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 May 2008 20:17:33 -0700
Subject: net: Set LL_MAX_HEADER properly for wireless.

Wireless networking, particularly with MESH enabled, has
quite strong requirements for link-layer header space.

Based upon some numbers and descriptions from Johannes Berg
we use 96 (same as AX25) for plain wireless, and with
mesh enabled we use 128.

In the process, simplify the cpp conditional logic here by
ordering the cases by those needing the most space down
to those needing the least case.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 746901774d49..a3fb57fde623 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -93,14 +93,16 @@ struct wireless_dev;
  *	used.
  */
  
-#if !defined(CONFIG_AX25) && !defined(CONFIG_AX25_MODULE) && !defined(CONFIG_TR)
-#define LL_MAX_HEADER	32
+#if defined(CONFIG_WLAN_80211) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+# if defined(CONFIG_MAC80211_MESH)
+#  define LL_MAX_HEADER 128
+# else
+#  define LL_MAX_HEADER 96
+# endif
+#elif defined(CONFIG_TR)
+# define LL_MAX_HEADER 48
 #else
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
-#define LL_MAX_HEADER	96
-#else
-#define LL_MAX_HEADER	48
-#endif
+# define LL_MAX_HEADER 32
 #endif
 
 #if !defined(CONFIG_NET_IPIP) && !defined(CONFIG_NET_IPIP_MODULE) && \
-- 
cgit v1.2.3


From f5184d267c1aedb9b7a8cc44e08ff6b8d382c3b5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 12 May 2008 20:48:31 -0700
Subject: net: Allow netdevices to specify needed head/tailroom

This patch adds needed_headroom/needed_tailroom members to struct
net_device and updates many places that allocate sbks to use them. Not
all of them can be converted though, and I'm sure I missed some (I
mostly grepped for LL_RESERVED_SPACE)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 16 ++++++++++++++--
 net/core/netpoll.c        |  2 +-
 net/econet/af_econet.c    |  2 +-
 net/ipv4/arp.c            |  2 +-
 net/ipv4/igmp.c           |  4 ++--
 net/ipv4/ipconfig.c       |  6 +++---
 net/ipv4/raw.c            | 10 ++++------
 net/ipv6/ip6_output.c     |  2 +-
 net/ipv6/mcast.c          |  4 ++--
 net/ipv6/ndisc.c          |  4 ++--
 net/ipv6/raw.c            | 10 ++++------
 net/packet/af_packet.c    |  2 +-
 net/xfrm/xfrm_output.c    |  6 +++---
 13 files changed, 39 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a3fb57fde623..b11e6e19e96c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -246,11 +246,16 @@ struct hh_cache
  *
  * We could use other alignment values, but we must maintain the
  * relationship HH alignment <= LL alignment.
+ *
+ * LL_ALLOCATED_SPACE also takes into account the tailroom the device
+ * may need.
  */
 #define LL_RESERVED_SPACE(dev) \
-	(((dev)->hard_header_len&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+	((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
-	((((dev)->hard_header_len+extra)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+	((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+#define LL_ALLOCATED_SPACE(dev) \
+	((((dev)->hard_header_len+(dev)->needed_headroom+(dev)->needed_tailroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 
 struct header_ops {
 	int	(*create) (struct sk_buff *skb, struct net_device *dev,
@@ -569,6 +574,13 @@ struct net_device
 	unsigned short		type;	/* interface hardware type	*/
 	unsigned short		hard_header_len;	/* hardware hdr length	*/
 
+	/* extra head- and tailroom the hardware may need, but not in all cases
+	 * can this be guaranteed, especially tailroom. Some cases also use
+	 * LL_MAX_HEADER instead to allocate the skb.
+	 */
+	unsigned short		needed_headroom;
+	unsigned short		needed_tailroom;
+
 	struct net_device	*master; /* Pointer to master device of a group,
 					  * which this device is member of.
 					  */
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b04d643fc3c7..8fb134da0346 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -419,7 +419,7 @@ static void arp_reply(struct sk_buff *skb)
 		return;
 
 	size = arp_hdr_len(skb->dev);
-	send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev),
+	send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
 			    LL_RESERVED_SPACE(np->dev));
 
 	if (!send_skb)
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 68d154480043..7c9bb13b1539 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -340,7 +340,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 		dev_hold(dev);
 
-		skb = sock_alloc_send_skb(sk, len+LL_RESERVED_SPACE(dev),
+		skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev),
 					  msg->msg_flags & MSG_DONTWAIT, &err);
 		if (skb==NULL)
 			goto out_unlock;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 68b72a7a1806..418862f1bf22 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -570,7 +570,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 	 *	Allocate a buffer
 	 */
 
-	skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+	skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL)
 		return NULL;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6250f4239b61..2769dc4a4c84 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -292,7 +292,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	struct iphdr *pip;
 	struct igmpv3_report *pig;
 
-	skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+	skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL)
 		return NULL;
 
@@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		return -1;
 	}
 
-	skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+	skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
 	if (skb == NULL) {
 		ip_rt_put(rt);
 		return -1;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 89dee4346f60..ed45037ce9be 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -710,14 +710,14 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
 	struct net_device *dev = d->dev;
 	struct sk_buff *skb;
 	struct bootp_pkt *b;
-	int hh_len = LL_RESERVED_SPACE(dev);
 	struct iphdr *h;
 
 	/* Allocate packet */
-	skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL);
+	skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15,
+			GFP_KERNEL);
 	if (!skb)
 		return;
-	skb_reserve(skb, hh_len);
+	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 	b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
 	memset(b, 0, sizeof(struct bootp_pkt));
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 11d7f753a820..fead049daf43 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -322,7 +322,6 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 			unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	int hh_len;
 	struct iphdr *iph;
 	struct sk_buff *skb;
 	unsigned int iphlen;
@@ -336,13 +335,12 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	if (flags&MSG_PROBE)
 		goto out;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
-	skb = sock_alloc_send_skb(sk, length+hh_len+15,
-				  flags&MSG_DONTWAIT, &err);
+	skb = sock_alloc_send_skb(sk,
+				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, hh_len);
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0af2e055f883..48cdce9c696c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -780,7 +780,7 @@ slow_path:
 		 *	Allocate buffer.
 		 */
 
-		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 			IP6_INC_STATS(ip6_dst_idev(skb->dst),
 				      IPSTATS_MIB_FRAGFAILS);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 54f91efdae58..fd632dd7f98d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 		     IPV6_TLV_PADN, 0 };
 
 	/* we assume size > sizeof(ra) here */
-	skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err);
+	skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err);
 
 	if (!skb)
 		return NULL;
@@ -1790,7 +1790,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	payload_len = len + sizeof(ra);
 	full_len = sizeof(struct ipv6hdr) + payload_len;
 
-	skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err);
+	skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err);
 
 	if (skb == NULL) {
 		rcu_read_lock();
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 2c74885f8355..a55fc05b8125 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -479,7 +479,7 @@ static void __ndisc_send(struct net_device *dev,
 
 	skb = sock_alloc_send_skb(sk,
 				  (MAX_HEADER + sizeof(struct ipv6hdr) +
-				   len + LL_RESERVED_SPACE(dev)),
+				   len + LL_ALLOCATED_SPACE(dev)),
 				  1, &err);
 	if (!skb) {
 		ND_PRINTK0(KERN_ERR
@@ -1521,7 +1521,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 
 	buff = sock_alloc_send_skb(sk,
 				   (MAX_HEADER + sizeof(struct ipv6hdr) +
-				    len + LL_RESERVED_SPACE(dev)),
+				    len + LL_ALLOCATED_SPACE(dev)),
 				   1, &err);
 	if (buff == NULL) {
 		ND_PRINTK0(KERN_ERR
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 396f0ea11090..232e0dc45bf5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -609,7 +609,6 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6hdr *iph;
 	struct sk_buff *skb;
-	unsigned int hh_len;
 	int err;
 
 	if (length > rt->u.dst.dev->mtu) {
@@ -619,13 +618,12 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	if (flags&MSG_PROBE)
 		goto out;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
-	skb = sock_alloc_send_skb(sk, length+hh_len+15,
-				  flags&MSG_DONTWAIT, &err);
+	skb = sock_alloc_send_skb(sk,
+				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, hh_len);
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 25070240d4ae..2cee87da4441 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -743,7 +743,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (len > dev->mtu+reserve)
 		goto out_unlock;
 
-	skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
+	skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
 				msg->msg_flags & MSG_DONTWAIT, &err);
 	if (skb==NULL)
 		goto out_unlock;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 09cd9c0c2d80..3f964db908a7 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -25,11 +25,11 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
 	struct dst_entry *dst = skb->dst;
 	int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev)
 		- skb_headroom(skb);
+	int ntail = dst->dev->needed_tailroom - skb_tailroom(skb);
 
-	if (nhead > 0)
-		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
+	if (nhead > 0 || ntail > 0)
+		return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC);
 
-	/* Check tail too... */
 	return 0;
 }
 
-- 
cgit v1.2.3


From f4ed0deae8983591264d0e194e168ef65f4775f5 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Mon, 12 May 2008 14:02:29 -0700
Subject: cpumask: remove bitmap_scnprintf_len and cpumask_scnprintf_len

They aren't used.  They were briefly used as part of some other patches to
provide an alternative format for displaying some /proc and /sys cpumasks.
They probably should have been removed when those other patches were dropped,
in favor of a different solution.

Signed-off-by: Paul Jackson <pj@sgi.com>
Cc: "Mike Travis" <travis@sgi.com>
Cc: "Bert Wesarg" <bert.wesarg@googlemail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h  |  1 -
 include/linux/cpumask.h |  7 -------
 lib/bitmap.c            | 16 ----------------
 3 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 43b406def35f..1abfe664c444 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -110,7 +110,6 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
 extern int bitmap_scnprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
-extern int bitmap_scnprintf_len(unsigned int len);
 extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
 			unsigned long *dst, int nbits);
 extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9650806fe2ea..5df3db58fcc6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -289,13 +289,6 @@ static inline int __cpumask_scnprintf(char *buf, int len,
 	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
 }
 
-#define cpumask_scnprintf_len(len) \
-			__cpumask_scnprintf_len((len))
-static inline int __cpumask_scnprintf_len(int len)
-{
-	return bitmap_scnprintf_len(len);
-}
-
 #define cpumask_parse_user(ubuf, ulen, dst) \
 			__cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
 static inline int __cpumask_parse_user(const char __user *buf, int len,
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c4cb48f77f0c..482df94ea21e 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -315,22 +315,6 @@ int bitmap_scnprintf(char *buf, unsigned int buflen,
 }
 EXPORT_SYMBOL(bitmap_scnprintf);
 
-/**
- * bitmap_scnprintf_len - return buffer length needed to convert
- * bitmap to an ASCII hex string.
- * @len: number of bits to be converted
- */
-int bitmap_scnprintf_len(unsigned int len)
-{
-	/* we need 9 chars per word for 32 bit words (8 hexdigits + sep/null) */
-	int bitslen = ALIGN(len, CHUNKSZ);
-	int wordlen = CHUNKSZ / 4;
-	int buflen = (bitslen / wordlen) * (wordlen + 1) * sizeof(char);
-
-	return buflen;
-}
-EXPORT_SYMBOL(bitmap_scnprintf_len);
-
 /**
  * __bitmap_parse - convert an ASCII hex string into a bitmap.
  * @buf: pointer to buffer containing string.
-- 
cgit v1.2.3


From 4cd1a8fc3d3cd740416b14ece2693dbb5d065eaf Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 12 May 2008 14:02:31 -0700
Subject: memcg: fix possible panic when CONFIG_MM_OWNER=y

When mm destruction happens, we should pass mm_update_next_owner() the old mm.
 But unfortunately new mm is passed in exec_mmap().

Thus, kernel panic is possible when a multi-threaded process uses exec().

Also, the owner member comment description is wrong.  mm->owner does not
necessarily point to the thread group leader.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: "Paul Menage" <menage@google.com>
Cc: "KAMEZAWA Hiroyuki" <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                |  2 +-
 include/linux/mm_types.h | 13 +++++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index aeaa9791d8be..1f8a24aa1f8b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -736,7 +736,7 @@ static int exec_mmap(struct mm_struct *mm)
 	tsk->active_mm = mm;
 	activate_mm(active_mm, mm);
 	task_unlock(tsk);
-	mm_update_next_owner(mm);
+	mm_update_next_owner(old_mm);
 	arch_pick_mmap_layout(mm);
 	if (old_mm) {
 		up_read(&old_mm->mmap_sem);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index eb7c16cc9559..02a27ae78539 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -226,8 +226,17 @@ struct mm_struct {
 	rwlock_t		ioctx_list_lock;	/* aio lock */
 	struct kioctx		*ioctx_list;
 #ifdef CONFIG_MM_OWNER
-	struct task_struct *owner;	/* The thread group leader that */
-					/* owns the mm_struct.		*/
+	/*
+	 * "owner" points to a task that is regarded as the canonical
+	 * user/owner of this mm. All of the following must be true in
+	 * order for it to be changed:
+	 *
+	 * current == mm->owner
+	 * current->mm != mm
+	 * new_owner->mm == mm
+	 * new_owner->alloc_lock is held
+	 */
+	struct task_struct *owner;
 #endif
 
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From 78bb6cb9a890d3d50ca3b02fce9223d3e734ab9b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 12 May 2008 14:02:32 -0700
Subject: fuse: add flag to turn on big writes

Prior to 2.6.26 fuse only supported single page write requests.  In theory all
fuse filesystem should be able support bigger than 4k writes, as there's
nothing in the API to prevent it.  Unfortunately there's a known case in
NTFS-3G where big writes cause filesystem corruption.  There could also be
other filesystems, where the lack of testing with big write requests would
result in bugs.

To prevent such problems on a kernel upgrade, disable big writes by default,
but let filesystems set a flag to turn it on.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Szabolcs Szakacsits <szaka@ntfs-3g.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/file.c       | 2 ++
 fs/fuse/fuse_i.h     | 3 +++
 fs/fuse/inode.c      | 5 ++++-
 include/linux/fuse.h | 1 +
 4 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f28cf8b46f80..8092f0d9fd1f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -804,6 +804,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 		if (offset == PAGE_CACHE_SIZE)
 			offset = 0;
 
+		if (!fc->big_writes)
+			break;
 	} while (iov_iter_count(ii) && count < fc->max_write &&
 		 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index dadffa21a206..bae948657c4f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -404,6 +404,9 @@ struct fuse_conn {
 	/** Is bmap not implemented by fs? */
 	unsigned no_bmap : 1;
 
+	/** Do multi-page cached writes */
+	unsigned big_writes : 1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 79b615873838..fb77e0962132 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -576,6 +576,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->no_lock = 1;
 			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
 				fc->atomic_o_trunc = 1;
+			if (arg->flags & FUSE_BIG_WRITES)
+				fc->big_writes = 1;
 		} else {
 			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
 			fc->no_lock = 1;
@@ -599,7 +601,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	arg->major = FUSE_KERNEL_VERSION;
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
-	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC;
+	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+		FUSE_BIG_WRITES;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 5c86f1196c3a..d48282197696 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -109,6 +109,7 @@ struct fuse_file_lock {
 #define FUSE_POSIX_LOCKS	(1 << 1)
 #define FUSE_FILE_OPS		(1 << 2)
 #define FUSE_ATOMIC_O_TRUNC	(1 << 3)
+#define FUSE_BIG_WRITES		(1 << 5)
 
 /**
  * Release flags
-- 
cgit v1.2.3


From d23039eec77473124c9635c01378314f196f2211 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 25 Apr 2008 19:23:16 -0700
Subject: USB: add association.h

This will be used by the wireless usb code, as well as potentially other
USB code.

Originally based on some .c code written by Inaky Perez-Gonzalez
<inaky.perez-gonzalez@intel.com>

Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/association.h | 150 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 include/linux/usb/association.h

(limited to 'include/linux')

diff --git a/include/linux/usb/association.h b/include/linux/usb/association.h
new file mode 100644
index 000000000000..07c5e3cf5898
--- /dev/null
+++ b/include/linux/usb/association.h
@@ -0,0 +1,150 @@
+/*
+ * Wireless USB - Cable Based Association
+ *
+ * Copyright (C) 2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ */
+#ifndef __LINUX_USB_ASSOCIATION_H
+#define __LINUX_USB_ASSOCIATION_H
+
+
+/*
+ * Association attributes
+ *
+ * Association Models Supplement to WUSB 1.0 T[3-1]
+ *
+ * Each field in the structures has it's ID, it's length and then the
+ * value. This is the actual definition of the field's ID and its
+ * length.
+ */
+struct wusb_am_attr {
+	__u8 id;
+	__u8 len;
+};
+
+/* Different fields defined by the spec */
+#define WUSB_AR_AssociationTypeId	{ .id = 0x0000, .len =  2 }
+#define WUSB_AR_AssociationSubTypeId	{ .id = 0x0001, .len =  2 }
+#define WUSB_AR_Length			{ .id = 0x0002, .len =  4 }
+#define WUSB_AR_AssociationStatus	{ .id = 0x0004, .len =  4 }
+#define WUSB_AR_LangID			{ .id = 0x0008, .len =  2 }
+#define WUSB_AR_DeviceFriendlyName	{ .id = 0x000b, .len = 64 } /* max */
+#define WUSB_AR_HostFriendlyName	{ .id = 0x000c, .len = 64 } /* max */
+#define WUSB_AR_CHID			{ .id = 0x1000, .len = 16 }
+#define WUSB_AR_CDID			{ .id = 0x1001, .len = 16 }
+#define WUSB_AR_ConnectionContext	{ .id = 0x1002, .len = 48 }
+#define WUSB_AR_BandGroups		{ .id = 0x1004, .len =  2 }
+
+/* CBAF Control Requests (AMS1.0[T4-1] */
+enum {
+	CBAF_REQ_GET_ASSOCIATION_INFORMATION = 0x01,
+	CBAF_REQ_GET_ASSOCIATION_REQUEST,
+	CBAF_REQ_SET_ASSOCIATION_RESPONSE
+};
+
+/*
+ * CBAF USB-interface defitions
+ *
+ * No altsettings, one optional interrupt endpoint.
+ */
+enum {
+	CBAF_IFACECLASS    = 0xef,
+	CBAF_IFACESUBCLASS = 0x03,
+	CBAF_IFACEPROTOCOL = 0x01,
+};
+
+/* Association Information (AMS1.0[T4-3]) */
+struct wusb_cbaf_assoc_info {
+	__le16 Length;
+	__u8 NumAssociationRequests;
+	__le16 Flags;
+	__u8 AssociationRequestsArray[];
+} __attribute__((packed));
+
+/* Association Request (AMS1.0[T4-4]) */
+struct wusb_cbaf_assoc_request {
+	__u8 AssociationDataIndex;
+	__u8 Reserved;
+	__le16 AssociationTypeId;
+	__le16 AssociationSubTypeId;
+	__le32 AssociationTypeInfoSize;
+} __attribute__((packed));
+
+enum {
+	AR_TYPE_WUSB                    = 0x0001,
+	AR_TYPE_WUSB_RETRIEVE_HOST_INFO = 0x0000,
+	AR_TYPE_WUSB_ASSOCIATE          = 0x0001,
+};
+
+/* Association Attribute header (AMS1.0[3.8]) */
+struct wusb_cbaf_attr_hdr {
+	__le16 id;
+	__le16 len;
+} __attribute__((packed));
+
+/* Host Info (AMS1.0[T4-7]) (yeah, more headers and fields...) */
+struct wusb_cbaf_host_info {
+	struct wusb_cbaf_attr_hdr AssociationTypeId_hdr;
+	__le16 AssociationTypeId;
+	struct wusb_cbaf_attr_hdr AssociationSubTypeId_hdr;
+	__le16 AssociationSubTypeId;
+	struct wusb_cbaf_attr_hdr CHID_hdr;
+	struct wusb_ckhdid CHID;
+	struct wusb_cbaf_attr_hdr LangID_hdr;
+	__le16 LangID;
+	struct wusb_cbaf_attr_hdr HostFriendlyName_hdr;
+	__u8 HostFriendlyName[];
+} __attribute__((packed));
+
+/* Device Info (AMS1.0[T4-8])
+ *
+ * I still don't get this tag'n'header stuff for each goddamn
+ * field...
+ */
+struct wusb_cbaf_device_info {
+	struct wusb_cbaf_attr_hdr Length_hdr;
+	__le32 Length;
+	struct wusb_cbaf_attr_hdr CDID_hdr;
+	struct wusb_ckhdid CDID;
+	struct wusb_cbaf_attr_hdr BandGroups_hdr;
+	__le16 BandGroups;
+	struct wusb_cbaf_attr_hdr LangID_hdr;
+	__le16 LangID;
+	struct wusb_cbaf_attr_hdr DeviceFriendlyName_hdr;
+	__u8 DeviceFriendlyName[];
+} __attribute__((packed));
+
+/* Connection Context; CC_DATA - Success case (AMS1.0[T4-9]) */
+struct wusb_cbaf_cc_data {
+	struct wusb_cbaf_attr_hdr AssociationTypeId_hdr;
+	__le16 AssociationTypeId;
+	struct wusb_cbaf_attr_hdr AssociationSubTypeId_hdr;
+	__le16 AssociationSubTypeId;
+	struct wusb_cbaf_attr_hdr Length_hdr;
+	__le32 Length;
+	struct wusb_cbaf_attr_hdr ConnectionContext_hdr;
+	struct wusb_ckhdid CHID;
+	struct wusb_ckhdid CDID;
+	struct wusb_ckhdid CK;
+	struct wusb_cbaf_attr_hdr BandGroups_hdr;
+	__le16 BandGroups;
+} __attribute__((packed));
+
+/* CC_DATA - Failure case (AMS1.0[T4-10]) */
+struct wusb_cbaf_cc_data_fail {
+	struct wusb_cbaf_attr_hdr AssociationTypeId_hdr;
+	__le16 AssociationTypeId;
+	struct wusb_cbaf_attr_hdr AssociationSubTypeId_hdr;
+	__le16 AssociationSubTypeId;
+	struct wusb_cbaf_attr_hdr Length_hdr;
+	__le16 Length;
+	struct wusb_cbaf_attr_hdr AssociationStatus_hdr;
+	__u32 AssociationStatus;
+} __attribute__((packed));
+
+#endif	/* __LINUX_USB_ASSOCIATION_H */
-- 
cgit v1.2.3


From 30f2f0eb4bd2c43d10a8b0d872c6e5ad8f31c9a0 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Tue, 6 May 2008 22:31:33 +0200
Subject: block: do_mounts - accept root=<non-existant partition>

Some devices, like md, may create partitions only at first access,
so allow root= to be set to a valid non-existant partition of an
existing disk. This applies only to non-initramfs root mounting.

This fixes a regression from 2.6.24 which did allow this to happen and
broke some users machines :(

Acked-by: Neil Brown <neilb@suse.de>
Tested-by: Joao Luis Meloni Assirati <assirati@nonada.if.usp.br>
Cc: stable <stable@kernel.org>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 block/genhd.c         |  9 ++++++---
 include/linux/genhd.h |  4 ++--
 init/do_mounts.c      | 27 ++++++++++++++++++++++++++-
 3 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index fda9c7a63c29..129ad939f9dd 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -653,7 +653,7 @@ void genhd_media_change_notify(struct gendisk *disk)
 EXPORT_SYMBOL_GPL(genhd_media_change_notify);
 #endif  /*  0  */
 
-dev_t blk_lookup_devt(const char *name)
+dev_t blk_lookup_devt(const char *name, int part)
 {
 	struct device *dev;
 	dev_t devt = MKDEV(0, 0);
@@ -661,7 +661,11 @@ dev_t blk_lookup_devt(const char *name)
 	mutex_lock(&block_class_lock);
 	list_for_each_entry(dev, &block_class.devices, node) {
 		if (strcmp(dev->bus_id, name) == 0) {
-			devt = dev->devt;
+			struct gendisk *disk = dev_to_disk(dev);
+
+			if (part < disk->minors)
+				devt = MKDEV(MAJOR(dev->devt),
+					     MINOR(dev->devt) + part);
 			break;
 		}
 	}
@@ -669,7 +673,6 @@ dev_t blk_lookup_devt(const char *name)
 
 	return devt;
 }
-
 EXPORT_SYMBOL(blk_lookup_devt);
 
 struct gendisk *alloc_disk(int minors)
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index e9874e7fcdf9..ae7aec3cabee 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -525,7 +525,7 @@ struct unixware_disklabel {
 #define ADDPART_FLAG_RAID	1
 #define ADDPART_FLAG_WHOLEDISK	2
 
-extern dev_t blk_lookup_devt(const char *name);
+extern dev_t blk_lookup_devt(const char *name, int part);
 extern char *disk_name (struct gendisk *hd, int part, char *buf);
 
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
@@ -553,7 +553,7 @@ static inline struct block_device *bdget_disk(struct gendisk *disk, int index)
 
 static inline void printk_all_partitions(void) { }
 
-static inline dev_t blk_lookup_devt(const char *name)
+static inline dev_t blk_lookup_devt(const char *name, int part)
 {
 	dev_t devt = MKDEV(0, 0);
 	return devt;
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 3885e70e7759..660c1e50c91b 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -76,6 +76,7 @@ dev_t name_to_dev_t(char *name)
 	char s[32];
 	char *p;
 	dev_t res = 0;
+	int part;
 
 	if (strncmp(name, "/dev/", 5) != 0) {
 		unsigned maj, min;
@@ -106,7 +107,31 @@ dev_t name_to_dev_t(char *name)
 	for (p = s; *p; p++)
 		if (*p == '/')
 			*p = '!';
-	res = blk_lookup_devt(s);
+	res = blk_lookup_devt(s, 0);
+	if (res)
+		goto done;
+
+	/*
+	 * try non-existant, but valid partition, which may only exist
+	 * after revalidating the disk, like partitioned md devices
+	 */
+	while (p > s && isdigit(p[-1]))
+		p--;
+	if (p == s || !*p || *p == '0')
+		goto fail;
+
+	/* try disk name without <part number> */
+	part = simple_strtoul(p, NULL, 10);
+	*p = '\0';
+	res = blk_lookup_devt(s, part);
+	if (res)
+		goto done;
+
+	/* try disk name without p<part number> */
+	if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
+		goto fail;
+	p[-1] = '\0';
+	res = blk_lookup_devt(s, part);
 	if (res)
 		goto done;
 
-- 
cgit v1.2.3


From 0a3ad00ca09632c6d0675f606276e92bdf1b306c Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Fri, 9 May 2008 15:24:08 +0800
Subject: Driver core: struct class remove children list

because of the class_device was removed, now do the children list removing

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c   | 1 -
 include/linux/device.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 0ef00e8d4153..e085af0ff94f 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -140,7 +140,6 @@ int class_register(struct class *cls)
 
 	pr_debug("device class '%s': registering\n", cls->name);
 
-	INIT_LIST_HEAD(&cls->children);
 	INIT_LIST_HEAD(&cls->devices);
 	INIT_LIST_HEAD(&cls->interfaces);
 	kset_init(&cls->class_dirs);
diff --git a/include/linux/device.h b/include/linux/device.h
index 8c23e3dfe3ac..15e9fa3ad3af 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -183,7 +183,6 @@ struct class {
 	struct module		*owner;
 
 	struct kset		subsys;
-	struct list_head	children;
 	struct list_head	devices;
 	struct list_head	interfaces;
 	struct kset		class_dirs;
-- 
cgit v1.2.3


From e0b4eb5193fed5c63413b0c137be29b0477d15ca Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 14 May 2008 23:06:15 +0200
Subject: make ide-iops.c:SELECT_MASK() static

SELECT_MASK() can now become static.

[bart: remove space between function name and open parenthesis]

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-iops.c | 2 +-
 include/linux/ide.h    | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 57d9a9a79a6f..0daf923541ff 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -95,7 +95,7 @@ void SELECT_DRIVE (ide_drive_t *drive)
 	hwif->OUTB(drive->select.all, hwif->io_ports.device_addr);
 }
 
-void SELECT_MASK (ide_drive_t *drive, int mask)
+static void SELECT_MASK(ide_drive_t *drive, int mask)
 {
 	const struct ide_port_ops *port_ops = drive->hwif->port_ops;
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index b0135b0c3a04..19ec852dffd2 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -965,7 +965,6 @@ typedef struct ide_task_s {
 void ide_tf_dump(const char *, struct ide_taskfile *);
 
 extern void SELECT_DRIVE(ide_drive_t *);
-extern void SELECT_MASK(ide_drive_t *, int);
 
 extern int drive_is_ready(ide_drive_t *);
 
-- 
cgit v1.2.3


From cafa027b8cc6f605ccebc43a960644307a12d8dd Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Wed, 14 May 2008 23:06:16 +0200
Subject: cs5520: disable VDMA

Disable Virtual DMA support for now (it causes system hangs).

Thanks to TAKADA Yoshihito for the help with debugging the problem.

Reported-by: TAKADA Yoshihito <takada@mbf.nifty.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/cs5520.c | 2 +-
 include/linux/ide.h      | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/pci/cs5520.c b/drivers/ide/pci/cs5520.c
index 17669a434438..992b1cf8db69 100644
--- a/drivers/ide/pci/cs5520.c
+++ b/drivers/ide/pci/cs5520.c
@@ -119,6 +119,7 @@ static const struct ide_dma_ops cs5520_dma_ops = {
 	.dma_timeout		= ide_dma_timeout,
 };
 
+/* FIXME: VDMA is disabled because it caused system hangs */
 #define DECLARE_CS_DEV(name_str)				\
 	{							\
 		.name		= name_str,			\
@@ -126,7 +127,6 @@ static const struct ide_dma_ops cs5520_dma_ops = {
 		.dma_ops	= &cs5520_dma_ops,		\
 		.host_flags	= IDE_HFLAG_ISA_PORTS |		\
 				  IDE_HFLAG_CS5520 |		\
-				  IDE_HFLAG_VDMA |		\
 				  IDE_HFLAG_NO_ATAPI_DMA |	\
 				  IDE_HFLAG_ABUSE_SET_DMA_MODE, \
 		.pio_mask	= ATA_PIO4,			\
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 19ec852dffd2..f8f195c20da2 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1057,8 +1057,8 @@ enum {
 	IDE_HFLAG_NO_SET_MODE		= (1 << 9),
 	/* trust BIOS for programming chipset/device for DMA */
 	IDE_HFLAG_TRUST_BIOS_FOR_DMA	= (1 << 10),
-	/* host uses VDMA (tied with IDE_HFLAG_CS5520 for now) */
-	IDE_HFLAG_VDMA			= (1 << 11),
+	/* host is CS5510/CS5520 */
+	IDE_HFLAG_CS5520		= (1 << 11),
 	/* ATAPI DMA is unsupported */
 	IDE_HFLAG_NO_ATAPI_DMA		= (1 << 12),
 	/* set if host is a "non-bootable" controller */
@@ -1069,8 +1069,6 @@ enum {
 	IDE_HFLAG_NO_AUTODMA		= (1 << 15),
 	/* host uses MMIO */
 	IDE_HFLAG_MMIO			= (1 << 16),
-	/* host is CS5510/CS5520 */
-	IDE_HFLAG_CS5520		= IDE_HFLAG_VDMA,
 	/* no LBA48 */
 	IDE_HFLAG_NO_LBA48		= (1 << 17),
 	/* no LBA48 DMA */
@@ -1100,6 +1098,8 @@ enum {
 	IDE_HFLAG_NO_IO_32BIT		= (1 << 30),
 	/* never unmask IRQs */
 	IDE_HFLAG_NO_UNMASK_IRQS	= (1 << 31),
+	/* host uses VDMA (disabled for now) */
+	IDE_HFLAG_VDMA			= 0,
 };
 
 #ifdef CONFIG_BLK_DEV_OFFBOARD
-- 
cgit v1.2.3


From b32a09db4fb9a87246ba4e7726a979ac4709ad97 Mon Sep 17 00:00:00 2001
From: Markus Armbruster <armbru@redhat.com>
Date: Tue, 26 Feb 2008 09:57:11 -0600
Subject: add match_strlcpy() us it to make v9fs make uname and remotename
 parsing more robust

match_strcpy() is a somewhat creepy function: the caller needs to make sure
that the destination buffer is big enough, and when he screws up or
forgets, match_strcpy() happily overruns the buffer.

There's exactly one customer: v9fs_parse_options().  I believe it currently
can't overflow its buffer, but that's not exactly obvious.

The source string is a substing of the mount options.  The kernel silently
truncates those to PAGE_SIZE bytes, including the terminating zero.  See
compat_sys_mount() and do_mount().

The destination buffer is obtained from __getname(), which allocates from
name_cachep, which is initialized by vfs_caches_init() for size PATH_MAX.

We're safe as long as PATH_MAX <= PAGE_SIZE.  PATH_MAX is 4096.  As far as
I know, the smallest PAGE_SIZE is also 4096.

Here's a patch that makes the code a bit more obviously correct.  It
doesn't depend on PATH_MAX <= PAGE_SIZE.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Cc: Jim Meyering <meyering@redhat.com>
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.c           |  4 ++--
 include/linux/parser.h |  2 +-
 lib/parser.c           | 32 ++++++++++++++++++++------------
 3 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 9b0f0222e8bb..e307fbd34fa0 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -125,10 +125,10 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses)
 			v9ses->afid = option;
 			break;
 		case Opt_uname:
-			match_strcpy(v9ses->uname, &args[0]);
+			match_strlcpy(v9ses->uname, &args[0], PATH_MAX);
 			break;
 		case Opt_remotename:
-			match_strcpy(v9ses->aname, &args[0]);
+			match_strlcpy(v9ses->aname, &args[0], PATH_MAX);
 			break;
 		case Opt_nodevmap:
 			v9ses->nodev = 1;
diff --git a/include/linux/parser.h b/include/linux/parser.h
index 26b2bdfcaf06..7dcd05075756 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -29,5 +29,5 @@ int match_token(char *, match_table_t table, substring_t args[]);
 int match_int(substring_t *, int *result);
 int match_octal(substring_t *, int *result);
 int match_hex(substring_t *, int *result);
-void match_strcpy(char *, const substring_t *);
+size_t match_strlcpy(char *, const substring_t *, size_t);
 char *match_strdup(const substring_t *);
diff --git a/lib/parser.c b/lib/parser.c
index 703c8c13b346..4f0cbc03e0e8 100644
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -182,18 +182,25 @@ int match_hex(substring_t *s, int *result)
 }
 
 /**
- * match_strcpy: - copies the characters from a substring_t to a string
- * @to: string to copy characters to.
- * @s: &substring_t to copy
+ * match_strlcpy: - Copy the characters from a substring_t to a sized buffer
+ * @dest: where to copy to
+ * @src: &substring_t to copy
+ * @size: size of destination buffer
  *
- * Description: Copies the set of characters represented by the given
- * &substring_t @s to the c-style string @to. Caller guarantees that @to is
- * large enough to hold the characters of @s.
+ * Description: Copy the characters in &substring_t @src to the
+ * c-style string @dest.  Copy no more than @size - 1 characters, plus
+ * the terminating NUL.  Return length of @src.
  */
-void match_strcpy(char *to, const substring_t *s)
+size_t match_strlcpy(char *dest, const substring_t *src, size_t size)
 {
-	memcpy(to, s->from, s->to - s->from);
-	to[s->to - s->from] = '\0';
+	size_t ret = src->to - src->from;
+
+	if (size) {
+		size_t len = ret >= size ? size - 1 : ret;
+		memcpy(dest, src->from, len);
+		dest[len] = '\0';
+	}
+	return ret;
 }
 
 /**
@@ -206,9 +213,10 @@ void match_strcpy(char *to, const substring_t *s)
  */
 char *match_strdup(const substring_t *s)
 {
-	char *p = kmalloc(s->to - s->from + 1, GFP_KERNEL);
+	size_t sz = s->to - s->from + 1;
+	char *p = kmalloc(sz, GFP_KERNEL);
 	if (p)
-		match_strcpy(p, s);
+		match_strlcpy(p, s, sz);
 	return p;
 }
 
@@ -216,5 +224,5 @@ EXPORT_SYMBOL(match_token);
 EXPORT_SYMBOL(match_int);
 EXPORT_SYMBOL(match_octal);
 EXPORT_SYMBOL(match_hex);
-EXPORT_SYMBOL(match_strcpy);
+EXPORT_SYMBOL(match_strlcpy);
 EXPORT_SYMBOL(match_strdup);
-- 
cgit v1.2.3


From 3fc957721d18c93662f7d4dab455b80f53dd2641 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 14 May 2008 16:05:49 -0700
Subject: lib: create common ascii hex array

Add a common hex array in hexdump.c so everyone can use it.

Add a common hi/lo helper to avoid the shifting masking that is
done to get the upper and lower nibbles of a byte value.

Pull the pack_hex_byte helper from kgdb as it is opencoded many
places in the tree that will be consolidated.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/kernel/kgdb_stub.c |  8 --------
 drivers/pnp/support.c      |  8 ++++----
 include/linux/kernel.h     | 12 +++++++++++-
 kernel/kgdb.c              |  8 --------
 lib/hexdump.c              |  7 +++++--
 5 files changed, 20 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/kernel/kgdb_stub.c b/arch/sh/kernel/kgdb_stub.c
index d453c3a1c79f..832641bbd47d 100644
--- a/arch/sh/kernel/kgdb_stub.c
+++ b/arch/sh/kernel/kgdb_stub.c
@@ -330,14 +330,6 @@ static char *ebin_to_mem(const char *buf, char *mem, int count)
 	return mem;
 }
 
-/* Pack a hex byte */
-static char *pack_hex_byte(char *pkt, int byte)
-{
-	*pkt++ = hexchars[(byte >> 4) & 0xf];
-	*pkt++ = hexchars[(byte & 0xf)];
-	return pkt;
-}
-
 /* Scan for the start char '$', read the packet and check the checksum */
 static void get_packet(char *buffer, int buflen)
 {
diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c
index 3eba85ed729c..95b076c18c07 100644
--- a/drivers/pnp/support.c
+++ b/drivers/pnp/support.c
@@ -45,10 +45,10 @@ void pnp_eisa_id_to_string(u32 id, char *str)
 	str[0] = 'A' + ((id >> 26) & 0x3f) - 1;
 	str[1] = 'A' + ((id >> 21) & 0x1f) - 1;
 	str[2] = 'A' + ((id >> 16) & 0x1f) - 1;
-	str[3] = hex_asc((id >> 12) & 0xf);
-	str[4] = hex_asc((id >>  8) & 0xf);
-	str[5] = hex_asc((id >>  4) & 0xf);
-	str[6] = hex_asc((id >>  0) & 0xf);
+	str[3] = hex_asc_hi(id >> 8);
+	str[4] = hex_asc_lo(id >> 8);
+	str[5] = hex_asc_hi(id);
+	str[6] = hex_asc_lo(id);
 	str[7] = '\0';
 }
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4d46e299afb5..792bf0aa779b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -276,7 +276,17 @@ extern void print_hex_dump(const char *level, const char *prefix_str,
 				const void *buf, size_t len, bool ascii);
 extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
 			const void *buf, size_t len);
-#define hex_asc(x)	"0123456789abcdef"[x]
+
+extern const char hex_asc[];
+#define hex_asc_lo(x)	hex_asc[((x) & 0x0f)]
+#define hex_asc_hi(x)	hex_asc[((x) & 0xf0) >> 4]
+
+static inline char *pack_hex_byte(char *buf, u8 byte)
+{
+	*buf++ = hex_asc_hi(byte);
+	*buf++ = hex_asc_lo(byte);
+	return buf;
+}
 
 #define pr_emerg(fmt, arg...) \
 	printk(KERN_EMERG fmt, ##arg)
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 39e31a036f5b..14787de568b3 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -346,14 +346,6 @@ static void put_packet(char *buffer)
 	}
 }
 
-static char *pack_hex_byte(char *pkt, u8 byte)
-{
-	*pkt++ = hexchars[byte >> 4];
-	*pkt++ = hexchars[byte & 0xf];
-
-	return pkt;
-}
-
 /*
  * Convert the memory pointed to by mem into hex, placing result in buf.
  * Return a pointer to the last char put in buf (null). May return an error.
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 343546550dc9..f07c0db81d26 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -12,6 +12,9 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
+const char hex_asc[] = "0123456789abcdef";
+EXPORT_SYMBOL(hex_asc);
+
 /**
  * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
  * @buf: data blob to dump
@@ -93,8 +96,8 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 		for (j = 0; (j < rowsize) && (j < len) && (lx + 4) < linebuflen;
 		     j++) {
 			ch = ptr[j];
-			linebuf[lx++] = hex_asc(ch >> 4);
-			linebuf[lx++] = hex_asc(ch & 0x0f);
+			linebuf[lx++] = hex_asc_hi(ch);
+			linebuf[lx++] = hex_asc_lo(ch);
 			linebuf[lx++] = ' ';
 		}
 		ascii_column = 3 * rowsize + 2;
-- 
cgit v1.2.3


From 44c81433e8b05dbc85985d939046f10f95901184 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 14 May 2008 16:05:51 -0700
Subject: per_cpu: fix DEFINE_PER_CPU_SHARED_ALIGNED for modules

Current module loader lookups ".data.percpu" ELF section to perform
per_cpu relocation.  But DEFINE_PER_CPU_SHARED_ALIGNED() uses another
section (".data.percpu.shared_aligned"), currently only handled in
vmlinux.lds, not by module loader.

To correct this problem, instead of adding logic into module loader, or
using at build time a module.lds file for all arches to group
".data.percpu.shared_aligned" into ".data.percpu", just use ".data.percpu"
for modules.

Alignment requirements are correctly handled by ld and module loader.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d746a2abb322..4cdd393e71e1 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -13,8 +13,14 @@
 	__attribute__((__section__(".data.percpu")))			\
 	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
 
+#ifdef MODULE
+#define SHARED_ALIGNED_SECTION ".data.percpu"
+#else
+#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned"
+#endif
+
 #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)			\
-	__attribute__((__section__(".data.percpu.shared_aligned")))	\
+	__attribute__((__section__(SHARED_ALIGNED_SECTION)))		\
 	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name		\
 	____cacheline_aligned_in_smp
 #else
-- 
cgit v1.2.3


From a1a61a435b3cc157830b7d42b175151ae5eabdd3 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 14 May 2008 23:24:09 -0700
Subject: atm: Cleanup atm_tcp.h and atm.h for userspace.

The atm_tcp.h uses types from linux/atm.h, but does not include it.
It should also use the standard __u## types from linux/types.h rather
than the uint##_t types since the former can be found with the kernel
already.

Same goes for linux/atm.h.  The linux/socket.h include there also gets
dropped as atm.h does not actually use anything from socket.h.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atm.h     |  7 ++-----
 include/linux/atm_tcp.h | 12 +++++-------
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atm.h b/include/linux/atm.h
index 60136684e0af..c791ddd96939 100644
--- a/include/linux/atm.h
+++ b/include/linux/atm.h
@@ -16,14 +16,11 @@
  * documentation. Do not change them.
  */
 
-#ifdef __KERNEL__
-#include <linux/socket.h>
-#include <linux/types.h>
-#endif
 #include <linux/compiler.h>
 #include <linux/atmapi.h>
 #include <linux/atmsap.h>
 #include <linux/atmioc.h>
+#include <linux/types.h>
 
 
 /* general ATM constants */
@@ -212,7 +209,7 @@ struct sockaddr_atmsvc {
         char		pub[ATM_E164_LEN+1]; /* public address (E.164) */
     					/* unused addresses must be bzero'ed */
 	char		lij_type;	/* role in LIJ call; one of ATM_LIJ* */
-	uint32_t	lij_id;		/* LIJ call identifier */
+	__u32	lij_id;		/* LIJ call identifier */
     } sas_addr __ATM_API_ALIGN;		/* SVC address */
 };
 
diff --git a/include/linux/atm_tcp.h b/include/linux/atm_tcp.h
index 18787f9b2f19..375638f8554b 100644
--- a/include/linux/atm_tcp.h
+++ b/include/linux/atm_tcp.h
@@ -8,11 +8,9 @@
 #define LINUX_ATM_TCP_H
 
 #include <linux/atmapi.h>
-
-#ifdef __KERNEL__
-#include <linux/types.h>
-#endif
+#include <linux/atm.h>
 #include <linux/atmioc.h>
+#include <linux/types.h>
 
 
 /*
@@ -20,9 +18,9 @@
  */
 
 struct atmtcp_hdr {
-	uint16_t	vpi;
-	uint16_t	vci;
-	uint32_t	length;		/* ... of data part */
+	__u16	vpi;
+	__u16	vci;
+	__u32	length;		/* ... of data part */
 };
 
 /*
-- 
cgit v1.2.3